提交 9fbe90ef 编写于 作者: H Helin Wang

fix according to comments

上级 9365d110
...@@ -236,49 +236,51 @@ TEST_F(NCCLTester, ncclReduceOp) { ...@@ -236,49 +236,51 @@ TEST_F(NCCLTester, ncclReduceOp) {
} }
// ncclBcastOp with desc // ncclBcastOp with desc
// TODO(helin): enable the test for ncclBcastOp // TODO(helin): https://github.com/PaddlePaddle/Paddle/issues/9540
// TEST_F(NCCLTester, ncclBcastOp) { /*
// std::unique_ptr<f::OpDesc> op2(new f::OpDesc); TEST_F(NCCLTester, ncclBcastOp) {
// const int kRoot = 0; std::unique_ptr<f::OpDesc> op2(new f::OpDesc);
// op2->SetType("ncclBcast"); const int kRoot = 0;
// op2->SetInput("X", {"st"}); op2->SetType("ncclBcast");
// op2->SetInput("Communicator", {"comm"}); op2->SetInput("X", {"st"});
// op2->SetOutput("Out", {"rt"}); op2->SetInput("Communicator", {"comm"});
// op2->SetAttr("root", kRoot); op2->SetOutput("Out", {"rt"});
op2->SetAttr("root", kRoot);
// std::vector<f::Scope *> dev_scopes;
std::vector<f::Scope *> dev_scopes;
// std::vector<std::thread> ths;
std::vector<std::thread> ths;
// for (size_t i = 0; i < gpu_list_.size(); ++i) {
// dev_scopes.emplace_back(&g_scope_.NewScope()); for (size_t i = 0; i < gpu_list_.size(); ++i) {
// std::thread th(&NCCLTester::PerThreadProgram<float>, this, gpu_list_[i], dev_scopes.emplace_back(&g_scope_.NewScope());
// *op2.get(), dev_scopes[i]); std::thread th(&NCCLTester::PerThreadProgram<float>, this, gpu_list_[i],
// ths.emplace_back(std::move(th)); *op2.get(), dev_scopes[i]);
// } ths.emplace_back(std::move(th));
}
// for (size_t i = 0; i < gpu_list_.size(); ++i) {
// ths[i].join(); for (size_t i = 0; i < gpu_list_.size(); ++i) {
// } ths[i].join();
}
// const int idx = 1;
// float result = GetGPUData(kRoot); const int idx = 1;
float result = GetGPUData(kRoot);
// p::CPUPlace cpu_place;
// p::CUDAPlace gpu_place(gpu_list_[idx]); p::CPUPlace cpu_place;
p::CUDAPlace gpu_place(gpu_list_[idx]);
// auto &recv_tensor = dev_scopes[idx]->FindVar("rt")->Get<f::LoDTensor>();
// auto *rt = recv_tensor.data<float>(); auto &recv_tensor = dev_scopes[idx]->FindVar("rt")->Get<f::LoDTensor>();
// auto *result_tensor = dev_scopes[idx]->Var("ct")->GetMutable<f::LoDTensor>(); auto *rt = recv_tensor.data<float>();
// result_tensor->Resize(kDims); auto *result_tensor = dev_scopes[idx]->Var("ct")->GetMutable<f::LoDTensor>();
// auto *ct = result_tensor->mutable_data<float>(cpu_place); result_tensor->Resize(kDims);
auto *ct = result_tensor->mutable_data<float>(cpu_place);
// paddle::memory::Copy(
// cpu_place, ct, p::CUDAPlace(gpu_list_[idx]), rt, paddle::memory::Copy(
// recv_tensor.numel() * sizeof(float), cpu_place, ct, p::CUDAPlace(gpu_list_[idx]), rt,
// static_cast<p::CUDADeviceContext *>(dev_ctxs_[idx])->stream()); recv_tensor.numel() * sizeof(float),
static_cast<p::CUDADeviceContext *>(dev_ctxs_[idx])->stream());
// for (int64_t j = 0; j < f::product(kDims); ++j) {
// ASSERT_NEAR(ct[j], result, 1e-5); for (int64_t j = 0; j < f::product(kDims); ++j) {
// } ASSERT_NEAR(ct[j], result, 1e-5);
// } }
}
*/
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册