From eac973d1b7817d23dac52546c6061f1a2dc03396 Mon Sep 17 00:00:00 2001 From: LiYuRio <63526175+LiYuRio@users.noreply.github.com> Date: Fri, 4 Nov 2022 15:38:01 +0800 Subject: [PATCH] forbid backward for comm (#47636) --- .../fluid/operators/collective/alltoall_op.cc | 20 +---------------- .../operators/collective/alltoall_op.cu.cc | 2 +- .../operators/collective/c_allgather_op.cc | 22 +++---------------- .../operators/collective/c_allgather_op.cu.cc | 2 +- .../collective/c_allreduce_max_op.cc | 11 ++++------ .../collective/c_allreduce_min_op.cc | 11 ++++------ .../collective/c_allreduce_prod_op.cc | 11 ++++------ .../collective/c_allreduce_sum_op.cu.cc | 2 +- .../operators/collective/c_broadcast_op.cu.cc | 2 +- .../collective/c_reducescatter_op.cc | 20 +++-------------- .../collective/c_reducescatter_op.cu.cc | 2 +- .../operators/collective/recv_v2_op.cu.cc | 2 +- .../operators/collective/send_v2_op.cu.cc | 2 +- 13 files changed, 26 insertions(+), 83 deletions(-) diff --git a/paddle/fluid/operators/collective/alltoall_op.cc b/paddle/fluid/operators/collective/alltoall_op.cc index fa7476e07c..b5512fdc52 100644 --- a/paddle/fluid/operators/collective/alltoall_op.cc +++ b/paddle/fluid/operators/collective/alltoall_op.cc @@ -61,31 +61,13 @@ Scatter tensors from all participators to all participators. } }; -template -class AllToAllOpGradMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr retv) const override { - retv->SetType("alltoall"); - retv->SetInput("X", this->OutputGrad("Out")); - retv->SetOutput("Out", this->InputGrad("X")); - retv->SetAttrMap(this->Attrs()); - } -}; - } // namespace operators } // namespace paddle namespace ops = paddle::operators; namespace plat = paddle::platform; -REGISTER_OPERATOR(alltoall, - ops::AllToAllOp, - ops::AllToAllOpMaker, - ops::AllToAllOpGradMaker, - ops::AllToAllOpGradMaker) +REGISTER_OP_WITHOUT_GRADIENT(alltoall, ops::AllToAllOp, ops::AllToAllOpMaker) REGISTER_OP_CPU_KERNEL(alltoall, ops::AllToAllOpCPUKernel, diff --git a/paddle/fluid/operators/collective/alltoall_op.cu.cc b/paddle/fluid/operators/collective/alltoall_op.cu.cc index e770f2e792..e50d14e5ef 100644 --- a/paddle/fluid/operators/collective/alltoall_op.cu.cc +++ b/paddle/fluid/operators/collective/alltoall_op.cu.cc @@ -95,7 +95,7 @@ namespace plat = paddle::platform; REGISTER_OP_CUDA_KERNEL(alltoall, ops::AllToAllOpCUDAKernel, ops::AllToAllOpCUDAKernel, -#if CUDNN_VERSION_MIN(8, 1, 0) && NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 ops::AllToAllOpCUDAKernel, #endif ops::AllToAllOpCUDAKernel, diff --git a/paddle/fluid/operators/collective/c_allgather_op.cc b/paddle/fluid/operators/collective/c_allgather_op.cc index bf45895670..83449de910 100644 --- a/paddle/fluid/operators/collective/c_allgather_op.cc +++ b/paddle/fluid/operators/collective/c_allgather_op.cc @@ -63,31 +63,15 @@ reference: https://docs.nvidia.com/deeplearning/sdk/nccl-developer-guide/docs/us } }; -template -class CAllGatherOpGradMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr retv) const override { - retv->SetType("c_reducescatter"); - retv->SetInput("X", this->OutputGrad("Out")); - retv->SetOutput("Out", this->InputGrad("X")); - retv->SetAttrMap(this->Attrs()); - } -}; - } // namespace operators } // namespace paddle namespace ops = paddle::operators; namespace plat = paddle::platform; -REGISTER_OPERATOR(c_allgather, - ops::CAllGatherOp, - ops::CAllGatherOpGradMaker, - ops::CAllGatherOpGradMaker, - ops::CAllGatherOpMaker); +REGISTER_OP_WITHOUT_GRADIENT(c_allgather, + ops::CAllGatherOp, + ops::CAllGatherOpMaker); REGISTER_OP_CPU_KERNEL(c_allgather, ops::CAllGatherOpCPUKernel, diff --git a/paddle/fluid/operators/collective/c_allgather_op.cu.cc b/paddle/fluid/operators/collective/c_allgather_op.cu.cc index 963eda0723..ddef85d73e 100644 --- a/paddle/fluid/operators/collective/c_allgather_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allgather_op.cu.cc @@ -96,7 +96,7 @@ namespace plat = paddle::platform; REGISTER_OP_CUDA_KERNEL(c_allgather, ops::CAllGatherOpCUDAKernel, ops::CAllGatherOpCUDAKernel, -#if CUDNN_VERSION_MIN(8, 1, 0) && NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 ops::CAllGatherOpCUDAKernel, #endif ops::CAllGatherOpCUDAKernel, diff --git a/paddle/fluid/operators/collective/c_allreduce_max_op.cc b/paddle/fluid/operators/collective/c_allreduce_max_op.cc index d5b2279f30..d339a51368 100644 --- a/paddle/fluid/operators/collective/c_allreduce_max_op.cc +++ b/paddle/fluid/operators/collective/c_allreduce_max_op.cc @@ -41,13 +41,10 @@ DECLARE_INPLACE_OP_INFERER(AllreduceMaxInplaceInferer, {"X", "Out"}); namespace ops = paddle::operators; namespace plat = paddle::platform; -REGISTER_OPERATOR( - c_allreduce_max, - ops::CAllReduceOp, - ops::CAllReduceMaxOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker, - ops::AllreduceMaxInplaceInferer) +REGISTER_OP_WITHOUT_GRADIENT(c_allreduce_max, + ops::CAllReduceOp, + ops::CAllReduceMaxOpMaker, + ops::AllreduceMaxInplaceInferer) REGISTER_OP_CPU_KERNEL(c_allreduce_max, ops::CAllReduceOpCPUKernel, diff --git a/paddle/fluid/operators/collective/c_allreduce_min_op.cc b/paddle/fluid/operators/collective/c_allreduce_min_op.cc index 3babe86961..8c1dd0172a 100644 --- a/paddle/fluid/operators/collective/c_allreduce_min_op.cc +++ b/paddle/fluid/operators/collective/c_allreduce_min_op.cc @@ -41,13 +41,10 @@ DECLARE_INPLACE_OP_INFERER(AllreduceMinInplaceInferer, {"X", "Out"}); namespace ops = paddle::operators; namespace plat = paddle::platform; -REGISTER_OPERATOR( - c_allreduce_min, - ops::CAllReduceOp, - ops::CAllReduceMinOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker, - ops::AllreduceMinInplaceInferer) +REGISTER_OP_WITHOUT_GRADIENT(c_allreduce_min, + ops::CAllReduceOp, + ops::CAllReduceMinOpMaker, + ops::AllreduceMinInplaceInferer) REGISTER_OP_CPU_KERNEL(c_allreduce_min, ops::CAllReduceOpCPUKernel, diff --git a/paddle/fluid/operators/collective/c_allreduce_prod_op.cc b/paddle/fluid/operators/collective/c_allreduce_prod_op.cc index c2c381a5a7..1d07b8226a 100644 --- a/paddle/fluid/operators/collective/c_allreduce_prod_op.cc +++ b/paddle/fluid/operators/collective/c_allreduce_prod_op.cc @@ -41,13 +41,10 @@ DECLARE_INPLACE_OP_INFERER(AllreduceProdInplaceInferer, {"X", "Out"}); namespace ops = paddle::operators; namespace plat = paddle::platform; -REGISTER_OPERATOR( - c_allreduce_prod, - ops::CAllReduceOp, - ops::CAllReduceProdOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker, - ops::AllreduceProdInplaceInferer) +REGISTER_OP_WITHOUT_GRADIENT(c_allreduce_prod, + ops::CAllReduceOp, + ops::CAllReduceProdOpMaker, + ops::AllreduceProdInplaceInferer) REGISTER_OP_CPU_KERNEL(c_allreduce_prod, ops::CAllReduceOpCPUKernel, diff --git a/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc b/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc index 2072f4c0cb..5e00c3bdba 100644 --- a/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc @@ -20,7 +20,7 @@ namespace plat = paddle::platform; REGISTER_OP_CUDA_KERNEL( c_allreduce_sum, ops::CAllReduceOpCUDAKernel, -#if CUDNN_VERSION_MIN(8, 1, 0) && NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 ops::CAllReduceOpCUDAKernel, #endif ops::CAllReduceOpCUDAKernel, diff --git a/paddle/fluid/operators/collective/c_broadcast_op.cu.cc b/paddle/fluid/operators/collective/c_broadcast_op.cu.cc index 3fe0065474..78fb50ce31 100644 --- a/paddle/fluid/operators/collective/c_broadcast_op.cu.cc +++ b/paddle/fluid/operators/collective/c_broadcast_op.cu.cc @@ -108,7 +108,7 @@ namespace plat = paddle::platform; REGISTER_OP_CUDA_KERNEL(c_broadcast, ops::CBroadcastOpCUDAKernel, ops::CBroadcastOpCUDAKernel, -#if CUDNN_VERSION_MIN(8, 1, 0) && NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 ops::CBroadcastOpCUDAKernel, #endif ops::CBroadcastOpCUDAKernel, diff --git a/paddle/fluid/operators/collective/c_reducescatter_op.cc b/paddle/fluid/operators/collective/c_reducescatter_op.cc index e1588235c7..0d8bda87e8 100644 --- a/paddle/fluid/operators/collective/c_reducescatter_op.cc +++ b/paddle/fluid/operators/collective/c_reducescatter_op.cc @@ -66,29 +66,15 @@ Reference: https://docs.nvidia.com/deeplearning/sdk/nccl-developer-guide/docs/us } }; -template -class CReduceScatterOpGradMaker : public framework::SingleGradOpMaker { - public: - using framework::SingleGradOpMaker::SingleGradOpMaker; - - protected: - void Apply(GradOpPtr retv) const override { - retv->SetType("c_allgather"); - retv->SetInput("X", this->OutputGrad("Out")); - retv->SetOutput("Out", this->InputGrad("X")); - retv->SetAttrMap(this->Attrs()); - } -}; - } // namespace operators } // namespace paddle namespace ops = paddle::operators; namespace plat = paddle::platform; -REGISTER_OPERATOR(c_reducescatter, - ops::CReduceScatterOp, - ops::CReduceScatterOpMaker); +REGISTER_OP_WITHOUT_GRADIENT(c_reducescatter, + ops::CReduceScatterOp, + ops::CReduceScatterOpMaker); REGISTER_OP_CPU_KERNEL(c_reducescatter, ops::CReduceScatterOpCPUKernel, diff --git a/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc b/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc index 9495ba44ca..b4eba9d124 100644 --- a/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc +++ b/paddle/fluid/operators/collective/c_reducescatter_op.cu.cc @@ -84,7 +84,7 @@ namespace plat = paddle::platform; REGISTER_OP_CUDA_KERNEL(c_reducescatter, ops::CReduceScatterOpCUDAKernel, ops::CReduceScatterOpCUDAKernel, -#if CUDNN_VERSION_MIN(8, 1, 0) && NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 ops::CReduceScatterOpCUDAKernel, #endif ops::CReduceScatterOpCUDAKernel, diff --git a/paddle/fluid/operators/collective/recv_v2_op.cu.cc b/paddle/fluid/operators/collective/recv_v2_op.cu.cc index 7644266307..06e06a79c6 100644 --- a/paddle/fluid/operators/collective/recv_v2_op.cu.cc +++ b/paddle/fluid/operators/collective/recv_v2_op.cu.cc @@ -236,7 +236,7 @@ namespace plat = paddle::platform; REGISTER_OP_CUDA_KERNEL(recv_v2, ops::RecvOpV2CUDAKernel, ops::RecvOpV2CUDAKernel, -#if CUDNN_VERSION_MIN(8, 1, 0) && NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 ops::RecvOpV2CUDAKernel, #endif ops::RecvOpV2CUDAKernel, diff --git a/paddle/fluid/operators/collective/send_v2_op.cu.cc b/paddle/fluid/operators/collective/send_v2_op.cu.cc index c6f4395e3b..c7ab3c749b 100644 --- a/paddle/fluid/operators/collective/send_v2_op.cu.cc +++ b/paddle/fluid/operators/collective/send_v2_op.cu.cc @@ -221,7 +221,7 @@ namespace plat = paddle::platform; REGISTER_OP_CUDA_KERNEL(send_v2, ops::SendOpV2CUDAKernel, ops::SendOpV2CUDAKernel, -#if CUDNN_VERSION_MIN(8, 1, 0) && NCCL_VERSION_CODE >= 21000 +#if NCCL_VERSION_CODE >= 21000 ops::SendOpV2CUDAKernel, #endif ops::SendOpV2CUDAKernel, -- GitLab