Wait the stream before memcpy_d2h_sync in tensor.numpy (#56189)

066097e8 · ronnywang · GitHub · 2386db87 · 066097e8
显示空白变更内容
内联并排

Showing with 2 addition and 0 deletion

paddle/fluid/pybind/eager_method.cc paddle/fluid/pybind/eager_method.cc +2 -0

未找到文件。
--- a/paddle/fluid/pybind/eager_method.cc
+++ b/paddle/fluid/pybind/eager_method.cc
@@ -272,6 +272,7 @@ static PyObject* tensor_method_numpy(TensorObject* self,
    gpuMemcpyKind kind = cudaMemcpyDeviceToHost;
 #elif defined(PADDLE_WITH_HIP)
    gpuMemcpyKind kind = hipMemcpyDeviceToHost;
+    phi::DeviceContextPool::Instance().Get(self->tensor.place())->Wait();
 #endif
    if (self->tensor.is_selected_rows()) {
      VLOG(6) << "Getting SelectedRows's numpy value";
@@ -341,6 +342,7 @@ static PyObject* tensor_method_numpy(TensorObject* self,
 #ifdef PADDLE_WITH_CUSTOM_DEVICE
  } else if (self->tensor.is_custom_device()) {
    eager_gil_scoped_release guard;
+    phi::DeviceContextPool::Instance().Get(self->tensor.place())->Wait();
    if (self->tensor.is_selected_rows()) {
      VLOG(6) << "Getting SelectedRows's numpy value";
      auto* selected_rows =