From 387f2276009a815b0d96fc7920fd31846fe71c19 Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Mon, 31 May 2021 18:59:38 +0800 Subject: [PATCH] [NPU] refine npu data_device_transform (#33224) --- paddle/fluid/framework/data_device_transform.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/paddle/fluid/framework/data_device_transform.cc b/paddle/fluid/framework/data_device_transform.cc index 7d005c9690..f447a00f37 100644 --- a/paddle/fluid/framework/data_device_transform.cc +++ b/paddle/fluid/framework/data_device_transform.cc @@ -26,6 +26,13 @@ void TransDataDevice(const Tensor &in, const platform::Place &dst_place, platform::errors::Unavailable("Currently, model parallelism is only " "supported between CPU and CUDA.")); + // NOTE(zhiqiu): Special case for CPU->NPU, avoid stream sync. + if (platform::is_cpu_place(in.place()) && platform::is_npu_place(dst_place)) { + TensorCopy(in, dst_place, + *platform::DeviceContextPool::Instance().Get(dst_place), out); + return; + } + // NOTE(yy): TransDataDevice should wait for computation of input. if (!platform::is_cuda_pinned_place(in.place())) { platform::DeviceContextPool::Instance().Get(in.place())->Wait(); -- GitLab