From 2133b45a14a4fb3e5a599fb786d9798e7c4d21fe Mon Sep 17 00:00:00 2001 From: Baibaifan <39549453+Baibaifan@users.noreply.github.com> Date: Wed, 23 Jun 2021 18:06:31 +0800 Subject: [PATCH] add aclcheck to c_comm_init (#33739) --- .../operators/collective/c_comm_init_hccl_op.cc | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/operators/collective/c_comm_init_hccl_op.cc b/paddle/fluid/operators/collective/c_comm_init_hccl_op.cc index 5f765d9544..3df0595525 100644 --- a/paddle/fluid/operators/collective/c_comm_init_hccl_op.cc +++ b/paddle/fluid/operators/collective/c_comm_init_hccl_op.cc @@ -69,10 +69,12 @@ class CCommInitOpAscend : public framework::OperatorBase { for (int32_t idx = 0; idx < size; idx++) { input[idx] = 1.0; } - aclrtMalloc(reinterpret_cast(&buff), size * sizeof(float), - ACL_MEM_MALLOC_HUGE_FIRST); - aclrtMemcpy(reinterpret_cast(buff), size * sizeof(float), - input.data(), size * sizeof(float), ACL_MEMCPY_HOST_TO_DEVICE); + PADDLE_ENFORCE_NPU_SUCCESS(aclrtMalloc(reinterpret_cast(&buff), + size * sizeof(float), + ACL_MEM_MALLOC_HUGE_FIRST)); + PADDLE_ENFORCE_NPU_SUCCESS(aclrtMemcpy( + reinterpret_cast(buff), size * sizeof(float), input.data(), + size * sizeof(float), ACL_MEMCPY_HOST_TO_DEVICE)); VLOG(3) << "Build buff data successful."; aclrtStream stream = nullptr; @@ -83,8 +85,8 @@ class CCommInitOpAscend : public framework::OperatorBase { auto dev_ctx = platform::DeviceContextPool::Instance().Get(place); stream = static_cast(dev_ctx)->stream(); } - platform::dynload::HcclBroadcast(buff, size, HCCL_DATA_TYPE_FP32, 0, - comm->comm(), stream); + PADDLE_ENFORCE_NPU_SUCCESS(platform::dynload::HcclBroadcast( + buff, size, HCCL_DATA_TYPE_FP32, 0, comm->comm(), stream)); VLOG(3) << "Build connection successful."; #else PADDLE_THROW(platform::errors::PreconditionNotMet( -- GitLab