fix according to comments

9fbe90ef · Helin Wang · 9365d110 · 9fbe90ef
隐藏空白更改
内联并排

Showing with 48 addition and 46 deletion

paddle/fluid/operators/nccl_op_test.cu.cc paddle/fluid/operators/nccl_op_test.cu.cc +48 -46

未找到文件。
--- a/paddle/fluid/operators/nccl_op_test.cu.cc
+++ b/paddle/fluid/operators/nccl_op_test.cu.cc
@@ -236,49 +236,51 @@ TEST_F(NCCLTester, ncclReduceOp) {
 }
 // ncclBcastOp with desc
-// TODO(helin): enable the test for ncclBcastOp
+// TODO(helin): https://github.com/PaddlePaddle/Paddle/issues/9540
-// TEST_F(NCCLTester, ncclBcastOp) {
+/*
-//   std::unique_ptr<f::OpDesc> op2(new f::OpDesc);
+TEST_F(NCCLTester, ncclBcastOp) {
-//   const int kRoot = 0;
+  std::unique_ptr<f::OpDesc> op2(new f::OpDesc);
-//   op2->SetType("ncclBcast");
+  const int kRoot = 0;
-//   op2->SetInput("X", {"st"});
+  op2->SetType("ncclBcast");
-//   op2->SetInput("Communicator", {"comm"});
+  op2->SetInput("X", {"st"});
-//   op2->SetOutput("Out", {"rt"});
+  op2->SetInput("Communicator", {"comm"});
-//   op2->SetAttr("root", kRoot);
+  op2->SetOutput("Out", {"rt"});
+  op2->SetAttr("root", kRoot);
-//   std::vector<f::Scope *> dev_scopes;
+  std::vector<f::Scope *> dev_scopes;
-//   std::vector<std::thread> ths;
+  std::vector<std::thread> ths;
-//   for (size_t i = 0; i < gpu_list_.size(); ++i) {
-//     dev_scopes.emplace_back(&g_scope_.NewScope());
+  for (size_t i = 0; i < gpu_list_.size(); ++i) {
-//     std::thread th(&NCCLTester::PerThreadProgram<float>, this, gpu_list_[i],
+    dev_scopes.emplace_back(&g_scope_.NewScope());
-//                    *op2.get(), dev_scopes[i]);
+    std::thread th(&NCCLTester::PerThreadProgram<float>, this, gpu_list_[i],
-//     ths.emplace_back(std::move(th));
+                   *op2.get(), dev_scopes[i]);
-//   }
+    ths.emplace_back(std::move(th));
+  }
-//   for (size_t i = 0; i < gpu_list_.size(); ++i) {
-//     ths[i].join();
+  for (size_t i = 0; i < gpu_list_.size(); ++i) {
-//   }
+    ths[i].join();
+  }
-//   const int idx = 1;
-//   float result = GetGPUData(kRoot);
+  const int idx = 1;
+  float result = GetGPUData(kRoot);
-//   p::CPUPlace cpu_place;
-//   p::CUDAPlace gpu_place(gpu_list_[idx]);
+  p::CPUPlace cpu_place;
+  p::CUDAPlace gpu_place(gpu_list_[idx]);
-//   auto &recv_tensor = dev_scopes[idx]->FindVar("rt")->Get<f::LoDTensor>();
-//   auto *rt = recv_tensor.data<float>();
+  auto &recv_tensor = dev_scopes[idx]->FindVar("rt")->Get<f::LoDTensor>();
-//   auto *result_tensor = dev_scopes[idx]->Var("ct")->GetMutable<f::LoDTensor>();
+  auto *rt = recv_tensor.data<float>();
-//   result_tensor->Resize(kDims);
+  auto *result_tensor = dev_scopes[idx]->Var("ct")->GetMutable<f::LoDTensor>();
-//   auto *ct = result_tensor->mutable_data<float>(cpu_place);
+  result_tensor->Resize(kDims);
+  auto *ct = result_tensor->mutable_data<float>(cpu_place);
-//   paddle::memory::Copy(
-//       cpu_place, ct, p::CUDAPlace(gpu_list_[idx]), rt,
+  paddle::memory::Copy(
-//       recv_tensor.numel() * sizeof(float),
+      cpu_place, ct, p::CUDAPlace(gpu_list_[idx]), rt,
-//       static_cast<p::CUDADeviceContext *>(dev_ctxs_[idx])->stream());
+      recv_tensor.numel() * sizeof(float),
+      static_cast<p::CUDADeviceContext *>(dev_ctxs_[idx])->stream());
-//   for (int64_t j = 0; j < f::product(kDims); ++j) {
-//     ASSERT_NEAR(ct[j], result, 1e-5);
+  for (int64_t j = 0; j < f::product(kDims); ++j) {
-//   }
+    ASSERT_NEAR(ct[j], result, 1e-5);
-// }
+  }
+}
+*/