未验证 提交 066097e8 编写于 作者: R ronnywang 提交者: GitHub

Wait the stream before memcpy_d2h_sync in tensor.numpy (#56189)

上级 2386db87
...@@ -272,6 +272,7 @@ static PyObject* tensor_method_numpy(TensorObject* self, ...@@ -272,6 +272,7 @@ static PyObject* tensor_method_numpy(TensorObject* self,
gpuMemcpyKind kind = cudaMemcpyDeviceToHost; gpuMemcpyKind kind = cudaMemcpyDeviceToHost;
#elif defined(PADDLE_WITH_HIP) #elif defined(PADDLE_WITH_HIP)
gpuMemcpyKind kind = hipMemcpyDeviceToHost; gpuMemcpyKind kind = hipMemcpyDeviceToHost;
phi::DeviceContextPool::Instance().Get(self->tensor.place())->Wait();
#endif #endif
if (self->tensor.is_selected_rows()) { if (self->tensor.is_selected_rows()) {
VLOG(6) << "Getting SelectedRows's numpy value"; VLOG(6) << "Getting SelectedRows's numpy value";
...@@ -341,6 +342,7 @@ static PyObject* tensor_method_numpy(TensorObject* self, ...@@ -341,6 +342,7 @@ static PyObject* tensor_method_numpy(TensorObject* self,
#ifdef PADDLE_WITH_CUSTOM_DEVICE #ifdef PADDLE_WITH_CUSTOM_DEVICE
} else if (self->tensor.is_custom_device()) { } else if (self->tensor.is_custom_device()) {
eager_gil_scoped_release guard; eager_gil_scoped_release guard;
phi::DeviceContextPool::Instance().Get(self->tensor.place())->Wait();
if (self->tensor.is_selected_rows()) { if (self->tensor.is_selected_rows()) {
VLOG(6) << "Getting SelectedRows's numpy value"; VLOG(6) << "Getting SelectedRows's numpy value";
auto* selected_rows = auto* selected_rows =
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册