From 6a56bce7b45dfa56cc595417e89f417465aa6a8c Mon Sep 17 00:00:00 2001 From: Roc <30228238+sljlp@users.noreply.github.com> Date: Sun, 15 Jan 2023 12:52:18 +0800 Subject: [PATCH] support mp on xpu (#49815) 1 update xccl lib 2 when using comm_ctx, the allocator should be set manually. --- cmake/external/xpu.cmake | 2 +- paddle/fluid/distributed/collective/process_group_bkcl.cc | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake index ab278dd5e9..be088de898 100644 --- a/cmake/external/xpu.cmake +++ b/cmake/external/xpu.cmake @@ -16,7 +16,7 @@ else() endif() set(XPU_XCCL_BASE_URL - "https://klx-sdk-release-public.su.bcebos.com/xccl/release/1.0.0") + "https://klx-sdk-release-public.su.bcebos.com/xccl/release/1.0.6") if(WITH_AARCH64) set(XPU_XRE_DIR_NAME "xre-kylin_aarch64") diff --git a/paddle/fluid/distributed/collective/process_group_bkcl.cc b/paddle/fluid/distributed/collective/process_group_bkcl.cc index 9c479ee054..5a8d4bca1c 100644 --- a/paddle/fluid/distributed/collective/process_group_bkcl.cc +++ b/paddle/fluid/distributed/collective/process_group_bkcl.cc @@ -128,6 +128,11 @@ void ProcessGroupBKCL::CreateBKCLEnvCache(const Place& place, platform::DeviceContextPool::Instance().Get(place)); // must use XPUDeviceContext here to make sure XPUContext::Init() is called auto comm_ctx = std::make_unique(place); + // set allocator + comm_ctx->SetAllocator(memory::allocation::AllocatorFacade::Instance() + .GetAllocator(place) + .get()); + BKCLContext_t bkcl_comm; BKCLCHECK(bkcl_init_rank(&bkcl_comm, GetRank(), GetSize(), &bkcl_id)); comm_ctx->SetBkclContext(bkcl_comm); -- GitLab