diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h index 4a9dbd324c90380e784cc9457845fabd858585be..c9cad15a74bdf1077d4103c5874a2c4556b73e65 100644 --- a/paddle/fluid/pybind/tensor_py.h +++ b/paddle/fluid/pybind/tensor_py.h @@ -190,6 +190,7 @@ void PyCUDATensorSetFromArray( static_cast(pool.Get(place)); paddle::platform::GpuMemcpyAsync(dst, array.data(), sizeof(T) * array.size(), cudaMemcpyHostToDevice, dev_ctx->stream()); + dev_ctx->Wait(); } template <>