未验证 提交 066097e8 编写于 作者: R ronnywang 提交者: GitHub

Wait the stream before memcpy_d2h_sync in tensor.numpy (#56189)

上级 2386db87
......@@ -272,6 +272,7 @@ static PyObject* tensor_method_numpy(TensorObject* self,
gpuMemcpyKind kind = cudaMemcpyDeviceToHost;
#elif defined(PADDLE_WITH_HIP)
gpuMemcpyKind kind = hipMemcpyDeviceToHost;
phi::DeviceContextPool::Instance().Get(self->tensor.place())->Wait();
#endif
if (self->tensor.is_selected_rows()) {
VLOG(6) << "Getting SelectedRows's numpy value";
......@@ -341,6 +342,7 @@ static PyObject* tensor_method_numpy(TensorObject* self,
#ifdef PADDLE_WITH_CUSTOM_DEVICE
} else if (self->tensor.is_custom_device()) {
eager_gil_scoped_release guard;
phi::DeviceContextPool::Instance().Get(self->tensor.place())->Wait();
if (self->tensor.is_selected_rows()) {
VLOG(6) << "Getting SelectedRows's numpy value";
auto* selected_rows =
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册