未验证 提交 8603b5fb 编写于 作者: L liuwei1031 提交者: GitHub

fix randomly hang issue of PaddleDetection training task on windows (#24977)

上级 640196c4
...@@ -32,6 +32,18 @@ void Copy<platform::CPUPlace, platform::CPUPlace>(platform::CPUPlace, void* dst, ...@@ -32,6 +32,18 @@ void Copy<platform::CPUPlace, platform::CPUPlace>(platform::CPUPlace, void* dst,
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
static constexpr size_t kMaxGpuAsyncCopyBytes = 64 * 1024; // 64K static constexpr size_t kMaxGpuAsyncCopyBytes = 64 * 1024; // 64K
inline void SyncCUDAStream() {
#if !defined(_WIN32)
cudaStreamSynchronize(0);
#else
cudaError_t e_sync = cudaSuccess;
while (e_sync = cudaStreamQuery(0)) {
if (e_sync == cudaErrorNotReady) continue;
break;
}
#endif
}
// NOTE(zcd): Do not use GpuMemcpySync as much as possible. // NOTE(zcd): Do not use GpuMemcpySync as much as possible.
// because GpuMemcpySync issues the copying command to the default stream, // because GpuMemcpySync issues the copying command to the default stream,
// which will make two commands from different streams cannot run concurrently. // which will make two commands from different streams cannot run concurrently.
...@@ -55,7 +67,7 @@ void Copy<platform::CPUPlace, platform::CUDAPlace>( ...@@ -55,7 +67,7 @@ void Copy<platform::CPUPlace, platform::CUDAPlace>(
platform::GpuMemcpySync(dst, src, num, cudaMemcpyDeviceToHost); platform::GpuMemcpySync(dst, src, num, cudaMemcpyDeviceToHost);
// FIXME(zjl): do we really need it? // FIXME(zjl): do we really need it?
if (num <= kMaxGpuAsyncCopyBytes) { if (num <= kMaxGpuAsyncCopyBytes) {
cudaStreamSynchronize(0); SyncCUDAStream();
} }
} }
} }
...@@ -77,7 +89,7 @@ void Copy<platform::CUDAPlace, platform::CPUPlace>( ...@@ -77,7 +89,7 @@ void Copy<platform::CUDAPlace, platform::CPUPlace>(
platform::GpuMemcpySync(dst, src, num, cudaMemcpyHostToDevice); platform::GpuMemcpySync(dst, src, num, cudaMemcpyHostToDevice);
// FIXME(zjl): do we really need it? // FIXME(zjl): do we really need it?
if (num <= kMaxGpuAsyncCopyBytes) { if (num <= kMaxGpuAsyncCopyBytes) {
cudaStreamSynchronize(0); SyncCUDAStream();
} }
} }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册