From 73473ac26c03395cfaaa3087c4fa82ad318266e5 Mon Sep 17 00:00:00 2001 From: Shaojie WANG Date: Sat, 8 Apr 2023 19:17:54 -0700 Subject: [PATCH] register bf16 for c ops (#52641) --- paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc | 3 +++ paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc | 2 +- paddle/fluid/operators/collective/c_concat_op.cu.cc | 3 +++ paddle/fluid/operators/collective/c_embedding_op.cu | 2 ++ paddle/fluid/operators/collective/c_identity_op.cu.cc | 3 +++ paddle/fluid/operators/collective/c_split_op.cu | 3 +++ paddle/fluid/operators/collective/partial_allgather_op.cu.cc | 3 +++ paddle/fluid/operators/collective/partial_recv_op.cu.cc | 3 +++ paddle/fluid/operators/collective/partial_send_op.cu.cc | 3 +++ paddle/fluid/operators/collective/recv_v2_op.cu.cc | 2 +- paddle/fluid/operators/collective/send_v2_op.cu.cc | 2 +- 11 files changed, 26 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc b/paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc index fb073c8173d..3726ff8ac90 100644 --- a/paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc @@ -20,6 +20,9 @@ namespace plat = paddle::platform; REGISTER_OP_CUDA_KERNEL( c_allreduce_max, ops::CAllReduceOpCUDAKernel, +#if NCCL_VERSION_CODE >= 21000 + ops::CAllReduceOpCUDAKernel, +#endif ops::CAllReduceOpCUDAKernel, ops::CAllReduceOpCUDAKernel, ops::CAllReduceOpCUDAKernel, diff --git a/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc b/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc index 2072f4c0cb3..5e00c3bdba3 100644 --- a/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc @@ -20,7 +20,7 @@ namespace plat = paddle::platform; REGISTER_OP_CUDA_KERNEL( c_allreduce_sum, ops::CAllReduceOpCUDAKernel, -#if CUDNN_VERSION_MIN(8, 1, 0) && NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 ops::CAllReduceOpCUDAKernel, #endif ops::CAllReduceOpCUDAKernel, diff --git a/paddle/fluid/operators/collective/c_concat_op.cu.cc b/paddle/fluid/operators/collective/c_concat_op.cu.cc index 74bdd2b63ae..1abe8a51b83 100644 --- a/paddle/fluid/operators/collective/c_concat_op.cu.cc +++ b/paddle/fluid/operators/collective/c_concat_op.cu.cc @@ -132,6 +132,9 @@ namespace plat = paddle::platform; REGISTER_OP_CUDA_KERNEL(c_concat, ops::CConcatOpCUDAKernel, ops::CConcatOpCUDAKernel, +#if NCCL_VERSION_CODE >= 21000 + ops::CConcatOpCUDAKernel, +#endif ops::CConcatOpCUDAKernel, ops::CConcatOpCUDAKernel, ops::CConcatOpCUDAKernel); diff --git a/paddle/fluid/operators/collective/c_embedding_op.cu b/paddle/fluid/operators/collective/c_embedding_op.cu index cddbd162571..04839660b09 100644 --- a/paddle/fluid/operators/collective/c_embedding_op.cu +++ b/paddle/fluid/operators/collective/c_embedding_op.cu @@ -253,8 +253,10 @@ namespace plat = paddle::platform; REGISTER_OP_CUDA_KERNEL(c_embedding, ops::CEmbeddingCUDAKernel, ops::CEmbeddingCUDAKernel, + ops::CEmbeddingCUDAKernel, ops::CEmbeddingCUDAKernel); REGISTER_OP_CUDA_KERNEL(c_embedding_grad, ops::CEmbeddingGradCUDAKernel, ops::CEmbeddingGradCUDAKernel, + ops::CEmbeddingGradCUDAKernel, ops::CEmbeddingGradCUDAKernel); diff --git a/paddle/fluid/operators/collective/c_identity_op.cu.cc b/paddle/fluid/operators/collective/c_identity_op.cu.cc index 0b2f5b7eb1a..f8ddb6ab010 100644 --- a/paddle/fluid/operators/collective/c_identity_op.cu.cc +++ b/paddle/fluid/operators/collective/c_identity_op.cu.cc @@ -19,6 +19,9 @@ namespace plat = paddle::platform; REGISTER_OP_CUDA_KERNEL(c_identity, ops::CIdentityOpKernel, +#if NCCL_VERSION_CODE >= 21000 + ops::CIdentityOpKernel, +#endif ops::CIdentityOpKernel, ops::CIdentityOpKernel, ops::CIdentityOpKernel, diff --git a/paddle/fluid/operators/collective/c_split_op.cu b/paddle/fluid/operators/collective/c_split_op.cu index 8bf887b954a..e3dfbb3a5f9 100644 --- a/paddle/fluid/operators/collective/c_split_op.cu +++ b/paddle/fluid/operators/collective/c_split_op.cu @@ -117,6 +117,9 @@ namespace plat = paddle::platform; REGISTER_OP_CUDA_KERNEL(c_split, ops::CSplitOpCUDAKernel, +#if NCCL_VERSION_CODE >= 21000 + ops::CSplitOpCUDAKernel, +#endif ops::CSplitOpCUDAKernel, ops::CSplitOpCUDAKernel, ops::CSplitOpCUDAKernel, diff --git a/paddle/fluid/operators/collective/partial_allgather_op.cu.cc b/paddle/fluid/operators/collective/partial_allgather_op.cu.cc index 6bc18254737..d0b131c8d76 100644 --- a/paddle/fluid/operators/collective/partial_allgather_op.cu.cc +++ b/paddle/fluid/operators/collective/partial_allgather_op.cu.cc @@ -110,6 +110,9 @@ namespace plat = paddle::platform; REGISTER_OP_CUDA_KERNEL(partial_allgather, ops::PartialAllGatherOpCUDAKernel, ops::PartialAllGatherOpCUDAKernel, +#if NCCL_VERSION_CODE >= 21000 + ops::PartialAllGatherOpCUDAKernel, +#endif ops::PartialAllGatherOpCUDAKernel, ops::PartialAllGatherOpCUDAKernel, ops::PartialAllGatherOpCUDAKernel); diff --git a/paddle/fluid/operators/collective/partial_recv_op.cu.cc b/paddle/fluid/operators/collective/partial_recv_op.cu.cc index ad44ce6a109..a9a98d3f3ee 100644 --- a/paddle/fluid/operators/collective/partial_recv_op.cu.cc +++ b/paddle/fluid/operators/collective/partial_recv_op.cu.cc @@ -121,6 +121,9 @@ namespace plat = paddle::platform; REGISTER_OP_CUDA_KERNEL(partial_recv, ops::PartialRecvOpCUDAKernel, ops::PartialRecvOpCUDAKernel, +#if NCCL_VERSION_CODE >= 21000 + ops::PartialRecvOpCUDAKernel, +#endif ops::PartialRecvOpCUDAKernel, ops::PartialRecvOpCUDAKernel, ops::PartialRecvOpCUDAKernel); diff --git a/paddle/fluid/operators/collective/partial_send_op.cu.cc b/paddle/fluid/operators/collective/partial_send_op.cu.cc index fb49318c012..6d99e246c9d 100644 --- a/paddle/fluid/operators/collective/partial_send_op.cu.cc +++ b/paddle/fluid/operators/collective/partial_send_op.cu.cc @@ -120,6 +120,9 @@ namespace plat = paddle::platform; REGISTER_OP_CUDA_KERNEL(partial_send, ops::PartialSendCUDAKernel, ops::PartialSendCUDAKernel, +#if NCCL_VERSION_CODE >= 21000 + ops::PartialSendCUDAKernel, +#endif ops::PartialSendCUDAKernel, ops::PartialSendCUDAKernel, ops::PartialSendCUDAKernel); diff --git a/paddle/fluid/operators/collective/recv_v2_op.cu.cc b/paddle/fluid/operators/collective/recv_v2_op.cu.cc index ec18a172e1f..2cbdae8b73d 100644 --- a/paddle/fluid/operators/collective/recv_v2_op.cu.cc +++ b/paddle/fluid/operators/collective/recv_v2_op.cu.cc @@ -236,7 +236,7 @@ namespace plat = paddle::platform; REGISTER_OP_CUDA_KERNEL(recv_v2, ops::RecvOpV2CUDAKernel, ops::RecvOpV2CUDAKernel, -#if CUDNN_VERSION_MIN(8, 1, 0) && NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 ops::RecvOpV2CUDAKernel, #endif ops::RecvOpV2CUDAKernel, diff --git a/paddle/fluid/operators/collective/send_v2_op.cu.cc b/paddle/fluid/operators/collective/send_v2_op.cu.cc index 37b18703031..299f0595713 100644 --- a/paddle/fluid/operators/collective/send_v2_op.cu.cc +++ b/paddle/fluid/operators/collective/send_v2_op.cu.cc @@ -222,7 +222,7 @@ namespace plat = paddle::platform; REGISTER_OP_CUDA_KERNEL(send_v2, ops::SendOpV2CUDAKernel, ops::SendOpV2CUDAKernel, -#if CUDNN_VERSION_MIN(8, 1, 0) && NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 ops::SendOpV2CUDAKernel, #endif ops::SendOpV2CUDAKernel, -- GitLab