diff --git a/paddle/fluid/framework/data_device_transform.cc b/paddle/fluid/framework/data_device_transform.cc index 85dbb39e6fba735471446b5e5e71a612282c498a..a876725ac0f17838458065c4b4753a03e2812801 100644 --- a/paddle/fluid/framework/data_device_transform.cc +++ b/paddle/fluid/framework/data_device_transform.cc @@ -36,9 +36,11 @@ void TransDataDevice(const Tensor& in, const platform::Place& dst_place, VLOG(3) << "DeviceTransform in, src_place " << in.place() << " dst_place: " << dst_place; auto* dev_ctx = GetDeviceContext(in.place(), dst_place); - dev_ctx->Wait(); + TensorCopy(in, dst_place, *dev_ctx, out); - dev_ctx->Wait(); + if (platform::is_gpu_place(in.place()) && platform::is_cpu_place(dst_place)) { + dev_ctx->Wait(); + } } } // namespace framework