diff --git a/paddle/fluid/operators/nccl_op_test.cu.cc b/paddle/fluid/operators/nccl_op_test.cu.cc index 7659bb9edd45a98cdb582205714f6d4115d08c23..28f13c805230a9d91fef37e0ce0beff0dd6fdcc4 100644 --- a/paddle/fluid/operators/nccl_op_test.cu.cc +++ b/paddle/fluid/operators/nccl_op_test.cu.cc @@ -236,49 +236,51 @@ TEST_F(NCCLTester, ncclReduceOp) { } // ncclBcastOp with desc -// TODO(helin): enable the test for ncclBcastOp -// TEST_F(NCCLTester, ncclBcastOp) { -// std::unique_ptr op2(new f::OpDesc); -// const int kRoot = 0; -// op2->SetType("ncclBcast"); -// op2->SetInput("X", {"st"}); -// op2->SetInput("Communicator", {"comm"}); -// op2->SetOutput("Out", {"rt"}); -// op2->SetAttr("root", kRoot); - -// std::vector dev_scopes; - -// std::vector ths; - -// for (size_t i = 0; i < gpu_list_.size(); ++i) { -// dev_scopes.emplace_back(&g_scope_.NewScope()); -// std::thread th(&NCCLTester::PerThreadProgram, this, gpu_list_[i], -// *op2.get(), dev_scopes[i]); -// ths.emplace_back(std::move(th)); -// } - -// for (size_t i = 0; i < gpu_list_.size(); ++i) { -// ths[i].join(); -// } - -// const int idx = 1; -// float result = GetGPUData(kRoot); - -// p::CPUPlace cpu_place; -// p::CUDAPlace gpu_place(gpu_list_[idx]); - -// auto &recv_tensor = dev_scopes[idx]->FindVar("rt")->Get(); -// auto *rt = recv_tensor.data(); -// auto *result_tensor = dev_scopes[idx]->Var("ct")->GetMutable(); -// result_tensor->Resize(kDims); -// auto *ct = result_tensor->mutable_data(cpu_place); - -// paddle::memory::Copy( -// cpu_place, ct, p::CUDAPlace(gpu_list_[idx]), rt, -// recv_tensor.numel() * sizeof(float), -// static_cast(dev_ctxs_[idx])->stream()); - -// for (int64_t j = 0; j < f::product(kDims); ++j) { -// ASSERT_NEAR(ct[j], result, 1e-5); -// } -// } +// TODO(helin): https://github.com/PaddlePaddle/Paddle/issues/9540 +/* +TEST_F(NCCLTester, ncclBcastOp) { + std::unique_ptr op2(new f::OpDesc); + const int kRoot = 0; + op2->SetType("ncclBcast"); + op2->SetInput("X", {"st"}); + op2->SetInput("Communicator", {"comm"}); + op2->SetOutput("Out", {"rt"}); + op2->SetAttr("root", kRoot); + + std::vector dev_scopes; + + std::vector ths; + + for (size_t i = 0; i < gpu_list_.size(); ++i) { + dev_scopes.emplace_back(&g_scope_.NewScope()); + std::thread th(&NCCLTester::PerThreadProgram, this, gpu_list_[i], + *op2.get(), dev_scopes[i]); + ths.emplace_back(std::move(th)); + } + + for (size_t i = 0; i < gpu_list_.size(); ++i) { + ths[i].join(); + } + + const int idx = 1; + float result = GetGPUData(kRoot); + + p::CPUPlace cpu_place; + p::CUDAPlace gpu_place(gpu_list_[idx]); + + auto &recv_tensor = dev_scopes[idx]->FindVar("rt")->Get(); + auto *rt = recv_tensor.data(); + auto *result_tensor = dev_scopes[idx]->Var("ct")->GetMutable(); + result_tensor->Resize(kDims); + auto *ct = result_tensor->mutable_data(cpu_place); + + paddle::memory::Copy( + cpu_place, ct, p::CUDAPlace(gpu_list_[idx]), rt, + recv_tensor.numel() * sizeof(float), + static_cast(dev_ctxs_[idx])->stream()); + + for (int64_t j = 0; j < f::product(kDims); ++j) { + ASSERT_NEAR(ct[j], result, 1e-5); + } +} +*/