diff --git a/paddle/fluid/operators/collective/alltoall_op.cu.cc b/paddle/fluid/operators/collective/alltoall_op.cu.cc index aacd76af4af0586de2cd2c97b439d8c380eaeefc..0ae338c745ae3a890b3e36a86ebc012b0f04636f 100644 --- a/paddle/fluid/operators/collective/alltoall_op.cu.cc +++ b/paddle/fluid/operators/collective/alltoall_op.cu.cc @@ -98,7 +98,7 @@ PD_REGISTER_STRUCT_KERNEL(alltoall, ops::AllToAllOpCUDAKernel, float, double, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif int, diff --git a/paddle/fluid/operators/collective/c_allgather_op.cu.cc b/paddle/fluid/operators/collective/c_allgather_op.cu.cc index 70b7d70dc93b31b032bf80e9e41121eeb57c4848..c3eff905851e3fff741024d850cda95ef9ec3bcd 100644 --- a/paddle/fluid/operators/collective/c_allgather_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allgather_op.cu.cc @@ -95,7 +95,7 @@ PD_REGISTER_STRUCT_KERNEL(c_allgather, ops::CAllGatherOpCUDAKernel, float, double, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif int, diff --git a/paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc b/paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc index 9be9674bb082bd1b5f6890f422521626d6da232b..277988b56916f8e682b8e67abd4adf20ef78fed5 100644 --- a/paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc @@ -28,7 +28,7 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_max, ALL_LAYOUT, ops::CAllReduceMaxCUDAKernel, float, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif double, diff --git a/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc b/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc index 8e45b7e43b2ed15b17bd8cf1a5198ef6ff613fe6..76d809cd234f03813fdea62f982757340c85e3f2 100644 --- a/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc @@ -28,7 +28,7 @@ PD_REGISTER_STRUCT_KERNEL(c_allreduce_sum, ALL_LAYOUT, ops::CAllReduceSumCUDAKernel, float, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif double, diff --git a/paddle/fluid/operators/collective/c_broadcast_op.cu.cc b/paddle/fluid/operators/collective/c_broadcast_op.cu.cc index d0d3ebb1394cbbee971e070502bcc3d03a3681ec..e37657a1747dec1b3ccd14ea9b32188d7a636b76 100644 --- a/paddle/fluid/operators/collective/c_broadcast_op.cu.cc +++ b/paddle/fluid/operators/collective/c_broadcast_op.cu.cc @@ -100,7 +100,7 @@ PD_REGISTER_STRUCT_KERNEL(c_broadcast, int64_t, float, double, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif plat::float16) { diff --git a/paddle/fluid/operators/collective/c_concat_op.cu.cc b/paddle/fluid/operators/collective/c_concat_op.cu.cc index dc47c184c221db33b07ff79599dad1a54fe284a4..1760b6ea3909393c3d8f982de3a6ad5af1891108 100644 --- a/paddle/fluid/operators/collective/c_concat_op.cu.cc +++ b/paddle/fluid/operators/collective/c_concat_op.cu.cc @@ -137,7 +137,7 @@ PD_REGISTER_STRUCT_KERNEL(c_concat, double, int, int64_t, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif plat::float16) { diff --git a/paddle/fluid/operators/collective/c_embedding_op.cu b/paddle/fluid/operators/collective/c_embedding_op.cu index 4861b5d26ab0f026563305dcda4fa32da1dd0409..758734ada66e83ee46dfc0476628eb8275d5accf 100644 --- a/paddle/fluid/operators/collective/c_embedding_op.cu +++ b/paddle/fluid/operators/collective/c_embedding_op.cu @@ -239,7 +239,7 @@ PD_REGISTER_STRUCT_KERNEL(c_embedding, ops::CEmbeddingCUDAKernel, float, double, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif plat::float16) { @@ -251,7 +251,7 @@ PD_REGISTER_STRUCT_KERNEL(c_embedding_grad, ops::CEmbeddingGradCUDAKernel, float, double, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif plat::float16) { diff --git a/paddle/fluid/operators/collective/c_identity_op.cu.cc b/paddle/fluid/operators/collective/c_identity_op.cu.cc index 3d5f16c218c8c5fa5840c5af00a08e9e1c871bd5..9571168db152c61c4cb12461406730ebfc8b27c9 100644 --- a/paddle/fluid/operators/collective/c_identity_op.cu.cc +++ b/paddle/fluid/operators/collective/c_identity_op.cu.cc @@ -25,7 +25,7 @@ PD_REGISTER_STRUCT_KERNEL(c_identity, double, int, int64_t, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif plat::float16) { diff --git a/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc b/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc index 35053b1511fcc3707aaabba690d01b42eb08e5c6..edae8feb61257b9678724124cedb01a29fac78b7 100644 --- a/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc +++ b/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc @@ -87,7 +87,7 @@ PD_REGISTER_STRUCT_KERNEL(c_reducescatter, ops::CReduceScatterOpCUDAKernel, float, double, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif int, diff --git a/paddle/fluid/operators/collective/c_split_op.cu b/paddle/fluid/operators/collective/c_split_op.cu index b01ed790e851715063312336b3eeae18b0382a26..0b3e2aaf781dbe227c646c2c2161d49b954d6829 100644 --- a/paddle/fluid/operators/collective/c_split_op.cu +++ b/paddle/fluid/operators/collective/c_split_op.cu @@ -123,7 +123,7 @@ PD_REGISTER_STRUCT_KERNEL(c_split, double, int, int64_t, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif plat::float16) { diff --git a/paddle/fluid/operators/collective/mp_allreduce_sum_op.cu.cc b/paddle/fluid/operators/collective/mp_allreduce_sum_op.cu.cc index b6af2dbd1c847ca8e347fe3ce99a5b0a6ffc2ccf..b4773a8eb54562f3bb6c6a85e39f31788002c0cc 100644 --- a/paddle/fluid/operators/collective/mp_allreduce_sum_op.cu.cc +++ b/paddle/fluid/operators/collective/mp_allreduce_sum_op.cu.cc @@ -31,7 +31,7 @@ PD_REGISTER_STRUCT_KERNEL(mp_allreduce_sum, double, int, int64_t, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif plat::float16) { diff --git a/paddle/fluid/operators/collective/partial_allgather_op.cu.cc b/paddle/fluid/operators/collective/partial_allgather_op.cu.cc index 2374f4a4aed8239053a4ccb51803377c0d75b596..d22fd70bd0f61846ec18eda7994ee2a31c9f2d70 100644 --- a/paddle/fluid/operators/collective/partial_allgather_op.cu.cc +++ b/paddle/fluid/operators/collective/partial_allgather_op.cu.cc @@ -108,7 +108,7 @@ PD_REGISTER_STRUCT_KERNEL(partial_allgather, ops::PartialAllGatherOpCUDAKernel, float, double, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif int, diff --git a/paddle/fluid/operators/collective/partial_recv_op.cu.cc b/paddle/fluid/operators/collective/partial_recv_op.cu.cc index b0df94194e4f87801b38a1a6df65236e8f9944a2..0c33ca7c25c3268db652356a2d78d8126dd53a5a 100644 --- a/paddle/fluid/operators/collective/partial_recv_op.cu.cc +++ b/paddle/fluid/operators/collective/partial_recv_op.cu.cc @@ -124,7 +124,7 @@ PD_REGISTER_STRUCT_KERNEL(partial_recv, ops::PartialRecvOpCUDAKernel, float, double, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif int, diff --git a/paddle/fluid/operators/collective/partial_send_op.cu.cc b/paddle/fluid/operators/collective/partial_send_op.cu.cc index dc24ea01fc98e96f59409f5a0628ba36642cc6c7..4f9fc41bc4e16fc1c8c243de7a329bebbcdc8324 100644 --- a/paddle/fluid/operators/collective/partial_send_op.cu.cc +++ b/paddle/fluid/operators/collective/partial_send_op.cu.cc @@ -123,7 +123,7 @@ PD_REGISTER_STRUCT_KERNEL(partial_send, ops::PartialSendCUDAKernel, float, double, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif int, diff --git a/paddle/fluid/operators/collective/recv_v2_op.cu.cc b/paddle/fluid/operators/collective/recv_v2_op.cu.cc index bfa12f911946d4b3eb17c99ce75caba3ba436c64..28058aa4868cd688e7470e83fea90d403b19065a 100644 --- a/paddle/fluid/operators/collective/recv_v2_op.cu.cc +++ b/paddle/fluid/operators/collective/recv_v2_op.cu.cc @@ -238,7 +238,7 @@ PD_REGISTER_STRUCT_KERNEL(recv_v2, ops::RecvOpV2CUDAKernel, float, double, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif int, diff --git a/paddle/fluid/operators/collective/send_v2_op.cu.cc b/paddle/fluid/operators/collective/send_v2_op.cu.cc index adea7db0b8088e61583f88c65a3b4f386177b5cd..a80dc1f91e45d55ac778e0f3a95050f299de30c6 100644 --- a/paddle/fluid/operators/collective/send_v2_op.cu.cc +++ b/paddle/fluid/operators/collective/send_v2_op.cu.cc @@ -223,7 +223,7 @@ PD_REGISTER_STRUCT_KERNEL(send_v2, ops::SendOpV2CUDAKernel, float, double, -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 plat::bfloat16, #endif int, diff --git a/paddle/fluid/platform/device/gpu/nccl_helper.h b/paddle/fluid/platform/device/gpu/nccl_helper.h index 8dd0639ce72f3bd30a43c7f5141852b2338ad4df..6afcd2eb7cd9720c7dfffdfc2625f26ba9910a16 100644 --- a/paddle/fluid/platform/device/gpu/nccl_helper.h +++ b/paddle/fluid/platform/device/gpu/nccl_helper.h @@ -59,7 +59,7 @@ inline ncclDataType_t ToNCCLDataType(framework::proto::VarType::Type type) { return ncclUint8; } else if (type == framework::proto::VarType::BOOL) { return ncclUint8; -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 } else if (type == framework::proto::VarType::BF16) { return ncclBfloat16; #endif @@ -86,7 +86,7 @@ inline ncclDataType_t ToNCCLDataType(phi::DataType type) { return ncclInt8; } else if (type == phi::DataType::BOOL) { return ncclUint8; -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 } else if (type == phi::DataType::BFLOAT16) { return ncclBfloat16; #endif diff --git a/paddle/phi/core/utils/data_type.h b/paddle/phi/core/utils/data_type.h index 16b73e0f2baa6738702971a89101df99ce68c99f..018672e45b5970c4cb26d73cb47a1bb48a2842b2 100644 --- a/paddle/phi/core/utils/data_type.h +++ b/paddle/phi/core/utils/data_type.h @@ -229,7 +229,7 @@ inline ncclDataType_t ToNCCLDataType(DataType type) { return ncclInt8; } else if (type == DataType::BOOL) { return ncclUint8; -#if NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 && CUDA_VERSION >= 11000 } else if (type == DataType::BFLOAT16) { return ncclBfloat16; #endif