diff --git a/paddle/fluid/operators/nccl_op_test.cu.cc b/paddle/fluid/operators/nccl_op_test.cu.cc index 90f6f955cea51ded2dbb2bde459113458d7749a4..7659bb9edd45a98cdb582205714f6d4115d08c23 100644 --- a/paddle/fluid/operators/nccl_op_test.cu.cc +++ b/paddle/fluid/operators/nccl_op_test.cu.cc @@ -236,48 +236,49 @@ TEST_F(NCCLTester, ncclReduceOp) { } // ncclBcastOp with desc -TEST_F(NCCLTester, ncclBcastOp) { - std::unique_ptr op2(new f::OpDesc); - const int kRoot = 0; - op2->SetType("ncclBcast"); - op2->SetInput("X", {"st"}); - op2->SetInput("Communicator", {"comm"}); - op2->SetOutput("Out", {"rt"}); - op2->SetAttr("root", kRoot); - - std::vector dev_scopes; - - std::vector ths; - - for (size_t i = 0; i < gpu_list_.size(); ++i) { - dev_scopes.emplace_back(&g_scope_.NewScope()); - std::thread th(&NCCLTester::PerThreadProgram, this, gpu_list_[i], - *op2.get(), dev_scopes[i]); - ths.emplace_back(std::move(th)); - } - - for (size_t i = 0; i < gpu_list_.size(); ++i) { - ths[i].join(); - } - - const int idx = 1; - float result = GetGPUData(kRoot); - - p::CPUPlace cpu_place; - p::CUDAPlace gpu_place(gpu_list_[idx]); - - auto &recv_tensor = dev_scopes[idx]->FindVar("rt")->Get(); - auto *rt = recv_tensor.data(); - auto *result_tensor = dev_scopes[idx]->Var("ct")->GetMutable(); - result_tensor->Resize(kDims); - auto *ct = result_tensor->mutable_data(cpu_place); - - paddle::memory::Copy( - cpu_place, ct, p::CUDAPlace(gpu_list_[idx]), rt, - recv_tensor.numel() * sizeof(float), - static_cast(dev_ctxs_[idx])->stream()); - - for (int64_t j = 0; j < f::product(kDims); ++j) { - ASSERT_NEAR(ct[j], result, 1e-5); - } -} +// TODO(helin): enable the test for ncclBcastOp +// TEST_F(NCCLTester, ncclBcastOp) { +// std::unique_ptr op2(new f::OpDesc); +// const int kRoot = 0; +// op2->SetType("ncclBcast"); +// op2->SetInput("X", {"st"}); +// op2->SetInput("Communicator", {"comm"}); +// op2->SetOutput("Out", {"rt"}); +// op2->SetAttr("root", kRoot); + +// std::vector dev_scopes; + +// std::vector ths; + +// for (size_t i = 0; i < gpu_list_.size(); ++i) { +// dev_scopes.emplace_back(&g_scope_.NewScope()); +// std::thread th(&NCCLTester::PerThreadProgram, this, gpu_list_[i], +// *op2.get(), dev_scopes[i]); +// ths.emplace_back(std::move(th)); +// } + +// for (size_t i = 0; i < gpu_list_.size(); ++i) { +// ths[i].join(); +// } + +// const int idx = 1; +// float result = GetGPUData(kRoot); + +// p::CPUPlace cpu_place; +// p::CUDAPlace gpu_place(gpu_list_[idx]); + +// auto &recv_tensor = dev_scopes[idx]->FindVar("rt")->Get(); +// auto *rt = recv_tensor.data(); +// auto *result_tensor = dev_scopes[idx]->Var("ct")->GetMutable(); +// result_tensor->Resize(kDims); +// auto *ct = result_tensor->mutable_data(cpu_place); + +// paddle::memory::Copy( +// cpu_place, ct, p::CUDAPlace(gpu_list_[idx]), rt, +// recv_tensor.numel() * sizeof(float), +// static_cast(dev_ctxs_[idx])->stream()); + +// for (int64_t j = 0; j < f::product(kDims); ++j) { +// ASSERT_NEAR(ct[j], result, 1e-5); +// } +// }