"add python test case"

23cb8259 · Dong Zhihong · 73883bde · 23cb8259 · 23cb8259 · 23cb8259
5 changed file
--- a/paddle/operators/nccl/nccl_gpu_common.cc
+++ b/paddle/operators/nccl/nccl_gpu_common.cc
@@ -18,7 +18,7 @@ NCCLManager::~NCCLManager() {
      int idx = gid % gpus_.size();
      // wait finish
      PADDLE_ENFORCE(
-          cudaStreamWaitEvent(*comm->streams_[idx], comm->events_[idx], 0));
+          cudaStreamWaitEvent(comm->streams_[idx], comm->events_[idx], 0));
      PADDLE_ENFORCE(cudaEventDestroy(comm->events_[idx]));

--- a/paddle/operators/nccl/nccl_gpu_common.h
+++ b/paddle/operators/nccl/nccl_gpu_common.h
@@ -65,20 +65,10 @@ class WaitGroup {
  std::condition_variable cv_;
 };
-// class NCCLContext : public DeviceContext {
-// public:
-//   explicit NCCLContext(GPUPlace place);
-//   virtual ~NCCLContext();
-// private:
-//   std::vector<int> gpu_ids_;
-//   std::vector<cudaStream_t> streams_;
-// };
 // TODO(dzh) : make resources managed unified with framework
 struct Communicator {
  std::vector<ncclComm_t> comms_;
-  std::vector<cudaStream_t*> streams_;
+  std::vector<cudaStream_t> streams_;
  std::vector<cudaEvent_t> events_;
  std::vector<int> gpus_;
  WaitGroup wg_;

--- a/paddle/operators/nccl/nccl_ops.cc
+++ b/paddle/operators/nccl/nccl_ops.cc
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+   http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
 #include "paddle/operators/nccl/nccl_ops.h"
 namespace paddle {
@@ -9,54 +20,27 @@ class NCCLAllReduceOp : public framework::OperatorWithKernel {
  using framework::OperatorWithKernel::OperatorWithKernel;
 protected:
-  // allreduce do nothing in infershape
+  void InferShape(framework::InferShapeContext *ctx) const override {
-  void InferShape(const framework::InferShapeContext &ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("X"),
-    PADDLE_ENFORCE_NOT_NULL(
+                   " Input(X) of AllReduce op input should not be NULL");
-        ctx.InputVar("X"),
+    PADDLE_ENFORCE(ctx->HasOutput("Out"),
                   " Input(X) of AllReduce op input should not be NULL");
-    auto ins = ctx.MultiInput<framework::Tensor>("X");
-    auto outs = ctx.MultiOutput<framework::Tensor>("Out");
-    PADDLE_ENFORCE(ins.size() == outs.size(),
-                   "Input(X) and Output(Out) must have same size");
-    for (size_t i = 0; i < ins.size(); ++i) {
-      outs[i]->Resize(ins[i]->dims());
-    }
-    std::string reduction = ctx.Attr<std::string>("reduction");
-    PADDLE_ENFORCE((reduction == "ncclSum" || reduction == "ncclProd" ||
-                    reduction == "ncclMin" || reduction == "ncclMax"),
-                   "invalid reduction!");
-  }
-};
-// BcastSendOp
-template <typename T>
-class NCCLBcastSendOp final : public framework::OperatorWithKernel {
- public:
-  using framework::OperatorWithKernel::OperatorWithKernel;
- protected:
+    auto x_dims = ctx->GetInputsDim("X");
-  void InferShape(const framework::InferShapeContext &ctx) const override {
-    PADDLE_ENFORCE_NOT_NULL(
-        ctx.InputVar("X"),
-        " Input(X) of BcastSend op input should not be NULL");
-  }
-};
-// BcastRecvOp
+    std::string reduction = ctx->Attrs().Get<std::string>("reduction");
-template <typename T>
+    PADDLE_ENFORCE((reduction == "ncclSum" || reduction == "ncclProd" ||
-class NCCLBcastRecvOp final : public framework::OperatorWithKernel {
+                    reduction == "ncclMin" || reduction == "ncclMax"),
- public:
+                   "invalid reduction.");
-  using framework::OperatorWithKernel::OperatorWithKernel;
- protected:
+    ctx->SetOutputsDim("Out", x_dims);
-  void InferShape(const framework::InferShapeContext &ctx) const override {
+    ctx->ShareLoD("X", /*->*/ "Out");
-    PADDLE_ENFORCE_NOT_NULL(
-        ctx.OutputVar("Out"),
-        " Input(X) of BcastRecv op input should not be NULL");
  }
 };
+// AllreduceOp
 class NCCLAllReduceOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
  NCCLAllReduceOpMaker(framework::OpProto *proto,
                       framework::OpAttrChecker *op_checker)
      : OpProtoAndCheckerMaker(proto, op_checker) {
@@ -71,7 +55,9 @@ class NCCLAllReduceOpMaker : public framework::OpProtoAndCheckerMaker {
  }
 };
+// BcastSendOp
 class NCCLBcastSendOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
  NCCLAllReduceOpMaker(framework::OpProto *proto,
                       framework::OpAttrChecker *op_checker)
      : OpProtoAndCheckerMaker(proto, op_checker) {
@@ -82,7 +68,9 @@ class NCCLBcastSendOpMaker : public framework::OpProtoAndCheckerMaker {
  }
 };
+// BcastRecvOp
 class NCCLBcastRecvOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
  NCCLAllReduceOpMaker(framework::OpProto *proto,
                       framework::OpAttrChecker *op_checker)
      : OpProtoAndCheckerMaker(proto, op_checker) {
@@ -93,5 +81,9 @@ class NCCLBcastRecvOpMaker : public framework::OpProtoAndCheckerMaker {
  }
 };
-}  // operators
+}  // namespace operators
-}  // paddle
+}  // namespace paddle
+namespace ops = paddle::operators;
+REGISTER_OP_WITHOUT_GRADIENT(ncclAllReduce, ops::NCCLAllReduceOp,
+                             ops::NCCLAllReduceOpMaker);
--- a/paddle/operators/nccl/nccl_ops.cu
+++ b/paddle/operators/nccl/nccl_ops.cu
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#define EIGEN_USE_GPU
+#include "paddle/operators/nccl/nccl_ops.h"
+namespace ops = paddle::operators;
+REGISTER_OP_GPU_KERNEL(ncclAllReduce, ops::NCCLAllReduceKernel<float>);
\ No newline at end of file
--- a/paddle/operators/nccl/nccl_ops.h
+++ b/paddle/operators/nccl/nccl_ops.h
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+   http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
 #pragma once
 #include "paddle/framework/op_registry.h"
 #include "paddle/operators/nccl/nccl_gpu_common.h"
@@ -14,11 +25,13 @@ class NCCLTypeWrapper;
 template <>
 class NCCLTypeWrapper<float> {
+ public:
  static const ncclDataType_t type = ncclFloat;
 };
 template <>
 class NCCLTypeWrapper<double> {
+ public:
  static const ncclDataType_t type = ncclDouble;
 };
@@ -49,10 +62,10 @@ class NCCLAllReduceKernel : public framework::OpKernel<T> {
    auto* comm = m->GetCommunicator(gpus);
    comm->wg_.Add(1);
-    auto* stream = &dev_ctx.stream();
+    auto stream = dev_ctx.stream();
    // device id
-    int gid = ctx.GetPlace().GetDeviceId();
+    int gid = static_cast<platform::GPUPlace>(ctx.GetPlace()).GetDeviceId();
    int idx = gid % gpus.size();
    comm->streams_[idx] = stream;
@@ -60,9 +73,8 @@ class NCCLAllReduceKernel : public framework::OpKernel<T> {
      PADDLE_ENFORCE(
          ncclAllReduce(ins[i]->data<T>(), outs[i]->mutable_data<T>(),
                        outs[i]->numel() * sizeof(T), NCCLTypeWrapper<T>::type,
-                        op_type, &comm->comms_[idx], comm->streams_[idx]));
+                        op_type, comm->comms_[idx], comm->streams_[idx]));
-      PADDLE_ENFORCE(
+      PADDLE_ENFORCE(cudaEventRecord(comm->events_[idx], comm->streams_[idx]));
-          cudaEventRecord(comm->events_[idx], *comms_->streams_[idx]));
      // wait finish
      PADDLE_ENFORCE(
@@ -71,8 +83,9 @@ class NCCLAllReduceKernel : public framework::OpKernel<T> {
    comm->wg_.Done();
-    wg.Wait();
+    comm->wg_.Wait();
  }
 };
-}
-}
+}  // namespace operators
+}  // namespace paddle