From e450823b8b4542d16e8bc188c5a5c1d9fbe41538 Mon Sep 17 00:00:00 2001 From: WangXi Date: Thu, 22 Oct 2020 10:01:32 +0800 Subject: [PATCH] Fix nccl op test failed, test=develop (#28172) --- .../fluid/operators/nccl/nccl_op_test.cu.cc | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/paddle/fluid/operators/nccl/nccl_op_test.cu.cc b/paddle/fluid/operators/nccl/nccl_op_test.cu.cc index 216a277938..6c7fba8d4a 100644 --- a/paddle/fluid/operators/nccl/nccl_op_test.cu.cc +++ b/paddle/fluid/operators/nccl/nccl_op_test.cu.cc @@ -174,10 +174,11 @@ void NCCLTester::testNcclAllReduceOp() { result_tensor->Resize(kDims); auto *ct = result_tensor->mutable_data(cpu_place); - paddle::memory::Copy( - cpu_place, ct, p::CUDAPlace(gpu_list_[i]), rt, - recv_tensor.numel() * sizeof(float), - static_cast(dev_ctxs_[i])->stream()); + auto *dev_ctx = static_cast(dev_ctxs_[i]); + paddle::memory::Copy(cpu_place, ct, p::CUDAPlace(gpu_list_[i]), rt, + recv_tensor.numel() * sizeof(float), + dev_ctx->stream()); + dev_ctx->Wait(); for (int64_t j = 0; j < f::product(kDims); ++j) { ASSERT_NEAR(ct[j], expected_result, 1e-5); @@ -272,10 +273,10 @@ void NCCLTester::testNcclBcastOp() { result_tensor->Resize(kDims); auto *ct = result_tensor->mutable_data(cpu_place); - paddle::memory::Copy( - cpu_place, ct, p::CUDAPlace(gpu_list_[idx]), rt, - recv_tensor.numel() * sizeof(float), - static_cast(dev_ctxs_[idx])->stream()); + auto *dev_ctx = static_cast(dev_ctxs_[idx]); + paddle::memory::Copy(cpu_place, ct, p::CUDAPlace(gpu_list_[idx]), rt, + recv_tensor.numel() * sizeof(float), dev_ctx->stream()); + dev_ctx->Wait(); for (int64_t j = 0; j < f::product(kDims); ++j) { ASSERT_NEAR(ct[j], result, 1e-5); @@ -288,13 +289,9 @@ TEST_F(NCCLTester, ncclInitOp) {} TEST_F(NCCLTester, ncclOp) { // Serial execution is required for the same nccl comm. - // ncclAllReduceOp with desc - // TODO(helin): https://github.com/PaddlePaddle/Paddle/issues/9367 testNcclReduceOp(); testNcclAllReduceOp(); - // ncclBcastOp with desc - // TODO(helin): https://github.com/PaddlePaddle/Paddle/issues/9540 testNcclBcastOp(); } -- GitLab