From fd68357bbeeba92f9caa75af44fb9e1266118163 Mon Sep 17 00:00:00 2001
From: sandyhouse <lilong12@baidu.com>
Date: Thu, 3 Sep 2020 07:40:41 +0000
Subject: [PATCH] add send recv op

---
 .../fluid/operators/collective/c_recv_op.cc   | 65 ++++++++++++++++++
 .../operators/collective/c_recv_op.cu.cc      | 68 +++++++++++++++++++
 paddle/fluid/operators/collective/c_recv_op.h | 38 +++++++++++
 .../fluid/operators/collective/c_send_op.cc   | 65 ++++++++++++++++++
 .../operators/collective/c_send_op.cu.cc      | 68 +++++++++++++++++++
 paddle/fluid/operators/collective/c_send_op.h | 38 +++++++++++
 6 files changed, 342 insertions(+)
 create mode 100644 paddle/fluid/operators/collective/c_recv_op.cc
 create mode 100644 paddle/fluid/operators/collective/c_recv_op.cu.cc
 create mode 100644 paddle/fluid/operators/collective/c_recv_op.h
 create mode 100644 paddle/fluid/operators/collective/c_send_op.cc
 create mode 100644 paddle/fluid/operators/collective/c_send_op.cu.cc
 create mode 100644 paddle/fluid/operators/collective/c_send_op.h
diff --git a/paddle/fluid/operators/collective/c_recv_op.cc b/paddle/fluid/operators/collective/c_recv_op.cc
new file mode 100644
index 00000000000..af3599a7bda
--- /dev/null
+++ b/paddle/fluid/operators/collective/c_recv_op.cc
@@ -0,0 +1,65 @@
+/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/operators/collective/c_recv_op.h"
+
+namespace paddle {
+namespace operators {
+
+class CRecvOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+  void InferShape(framework::InferShapeContext* ctx) const override {}
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override {
+    return framework::OpKernelType(
+        OperatorWithKernel::IndicateVarDataType(ctx, "Out"), ctx.GetPlace());
+  }
+};
+
+class CSendOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() {
+    AddInput("Out", "(Tensor) tensor to receive.");
+    AddAttr<int>("ring_id", "(int default 0) nccl communication ring id.")
+        .SetDefault(0);
+    AddAttr<int>("peer", "(int default 0) rank id for sender.").SetDefault(0);
+    AddAttr<bool>(
+        "use_calc_stream",
+        "(bool default false) eject CUDA operations to calculation stream.")
+        .SetDefault(false);
+    AddComment(R"DOC(
+CRecv Operator
+
+Reference: https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/usage/p2p.html#sendrecv
+)DOC");
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+namespace plat = paddle::platform;
+
+REGISTER_OP_WITHOUT_GRADIENT(c_recv, ops::CRecvOp, ops::CRecvOpMaker);
+
+REGISTER_OP_CPU_KERNEL(c_recv, ops::CRecvOpCPUKernel<float>,
+                       ops::CRecvOpCPUKernel<double>,
+                       ops::CRecvOpCPUKernel<int>,
+                       ops::CRecvOpCPUKernel<int64_t>,
+                       ops::CRecvOpCPUKernel<plat::float16>);
diff --git a/paddle/fluid/operators/collective/c_recv_op.cu.cc b/paddle/fluid/operators/collective/c_recv_op.cu.cc
new file mode 100644
index 00000000000..ea6a612b053
--- /dev/null
+++ b/paddle/fluid/operators/collective/c_recv_op.cu.cc
@@ -0,0 +1,68 @@
+/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/operators/collective/c_send_op.h"
+
+#if defined(PADDLE_WITH_NCCL)
+#include "paddle/fluid/platform/collective_helper.h"
+#include "paddle/fluid/platform/nccl_helper.h"
+#endif
+
+namespace paddle {
+namespace operators {
+
+template <typename T>
+class CSendOpCUDAKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+#if defined(PADDLE_WITH_NCCL)
+    auto out = ctx.Output<framework::LoDTensor>("Out");
+    int numel = out->numel();
+    ncclDataType_t dtype = platform::ToNCCLDataType(out->type());
+
+    int rid = ctx.Attr<int>("ring_id");
+    auto place = ctx.GetPlace();
+    auto comm = platform::NCCLCommContext::Instance().Get(rid, place);
+
+    cudaStream_t stream = nullptr;
+    if (ctx.Attr<bool>("use_calc_stream")) {
+      auto dev_ctx = platform::DeviceContextPool::Instance().Get(place);
+      stream = static_cast<platform::CUDADeviceContext*>(dev_ctx)->stream();
+    } else {
+      stream = comm->stream();
+    }
+
+    int peer = ctx.Attr<int>("peer");
+    PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclSend(
+        out->mutable_data<T>(place), numel, dtype, peer, comm->comm(), stream));
+    VLOG(3) << "rank " << comm->rank() << " recv "
+            << framework::product(out->dims()) << " from " << peer;
+#else
+    PADDLE_THROW(
+        platform::errors::Unavailable("PaddlePaddle should compile with GPU."));
+#endif
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+namespace plat = paddle::platform;
+
+REGISTER_OP_CUDA_KERNEL(c_recv, ops::CRecvOpCUDAKernel<float>,
+                        ops::CRecvOpCUDAKernel<double>,
+                        ops::CRecvOpCUDAKernel<int>,
+                        ops::CRecvOpCUDAKernel<int64_t>,
+                        ops::CRecvOpCUDAKernel<plat::float16>);
diff --git a/paddle/fluid/operators/collective/c_recv_op.h b/paddle/fluid/operators/collective/c_recv_op.h
new file mode 100644
index 00000000000..ad0f633b5c2
--- /dev/null
+++ b/paddle/fluid/operators/collective/c_recv_op.h
@@ -0,0 +1,38 @@
+/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "paddle/fluid/framework/data_type.h"
+#include "paddle/fluid/framework/lod_tensor.h"
+#include "paddle/fluid/framework/op_registry.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T>
+class CRecvOpCPUKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    PADDLE_THROW(platform::errors::Unavailable(
+        "Do not support recv for cpu kernel now."));
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/operators/collective/c_send_op.cc b/paddle/fluid/operators/collective/c_send_op.cc
new file mode 100644
index 00000000000..54c4b86bbdb
--- /dev/null
+++ b/paddle/fluid/operators/collective/c_send_op.cc
@@ -0,0 +1,65 @@
+/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/operators/collective/c_send_op.h"
+
+namespace paddle {
+namespace operators {
+
+class CSendOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+  void InferShape(framework::InferShapeContext* ctx) const override {}
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override {
+    return framework::OpKernelType(
+        OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace());
+  }
+};
+
+class CSendOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() {
+    AddInput("X", "(Tensor) tensor to be sent.");
+    AddAttr<int>("ring_id", "(int default 0) nccl communication ring id.")
+        .SetDefault(0);
+    AddAttr<int>("peer", "(int default 0) rank id for receiver.").SetDefault(0);
+    AddAttr<bool>(
+        "use_calc_stream",
+        "(bool default false) eject CUDA operations to calculation stream.")
+        .SetDefault(false);
+    AddComment(R"DOC(
+CSend Operator
+
+Reference: https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/usage/p2p.html#sendrecv
+)DOC");
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+namespace plat = paddle::platform;
+
+REGISTER_OP_WITHOUT_GRADIENT(c_send, ops::CSendOp, ops::CSendOpMaker);
+
+REGISTER_OP_CPU_KERNEL(c_send, ops::CSendOpCPUKernel<float>,
+                       ops::CSendOpCPUKernel<double>,
+                       ops::CSendOpCPUKernel<int>,
+                       ops::CSendOpCPUKernel<int64_t>,
+                       ops::CSendOpCPUKernel<plat::float16>);
diff --git a/paddle/fluid/operators/collective/c_send_op.cu.cc b/paddle/fluid/operators/collective/c_send_op.cu.cc
new file mode 100644
index 00000000000..97b62849ca6
--- /dev/null
+++ b/paddle/fluid/operators/collective/c_send_op.cu.cc
@@ -0,0 +1,68 @@
+/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/operators/collective/c_send_op.h"
+
+#if defined(PADDLE_WITH_NCCL)
+#include "paddle/fluid/platform/collective_helper.h"
+#include "paddle/fluid/platform/nccl_helper.h"
+#endif
+
+namespace paddle {
+namespace operators {
+
+template <typename T>
+class CSendOpCUDAKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+#if defined(PADDLE_WITH_NCCL)
+    auto x = ctx.Input<framework::LoDTensor>("X");
+    int numel = x->numel();
+    ncclDataType_t dtype = platform::ToNCCLDataType(x->type());
+
+    int rid = ctx.Attr<int>("ring_id");
+    auto place = ctx.GetPlace();
+    auto comm = platform::NCCLCommContext::Instance().Get(rid, place);
+
+    cudaStream_t stream = nullptr;
+    if (ctx.Attr<bool>("use_calc_stream")) {
+      auto dev_ctx = platform::DeviceContextPool::Instance().Get(place);
+      stream = static_cast<platform::CUDADeviceContext*>(dev_ctx)->stream();
+    } else {
+      stream = comm->stream();
+    }
+
+    int peer = ctx.Attr<int>("peer");
+    PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclSend(
+        x->data<T>(), numel, dtype, peer, comm->comm(), stream));
+    VLOG(3) << "rank " << comm->rank() << " send "
+            << framework::product(x->dims()) << " to " << peer;
+#else
+    PADDLE_THROW(
+        platform::errors::Unavailable("PaddlePaddle should compile with GPU."));
+#endif
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+namespace plat = paddle::platform;
+
+REGISTER_OP_CUDA_KERNEL(c_send, ops::CSendOpCUDAKernel<float>,
+                        ops::CSendOpCUDAKernel<double>,
+                        ops::CSendOpCUDAKernel<int>,
+                        ops::CSendOpCUDAKernel<int64_t>,
+                        ops::CSendOpCUDAKernel<plat::float16>);
diff --git a/paddle/fluid/operators/collective/c_send_op.h b/paddle/fluid/operators/collective/c_send_op.h
new file mode 100644
index 00000000000..5a711ba10d2
--- /dev/null
+++ b/paddle/fluid/operators/collective/c_send_op.h
@@ -0,0 +1,38 @@
+/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "paddle/fluid/framework/data_type.h"
+#include "paddle/fluid/framework/lod_tensor.h"
+#include "paddle/fluid/framework/op_registry.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T>
+class CSendOpCPUKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    PADDLE_THROW(platform::errors::Unavailable(
+        "Do not support send for cpu kernel now."));
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
-- 
GitLab