未验证 提交 32633c8e 编写于 作者: R Ruibiao Chen 提交者: GitHub

SetDeviceId in StreamSafeCUDAAllocation (#49080)

上级 439b2b94
......@@ -16,6 +16,7 @@
#include <thread>
#include "paddle/fluid/platform/profiler/event_tracing.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/device/gpu/cuda/cuda_graph.h"
......@@ -43,6 +44,9 @@ void StreamSafeCUDAAllocation::RecordStream(gpuStream_t stream) {
return;
}
std::call_once(once_flag_,
[this] { phi::backends::gpu::SetDeviceId(place_.device); });
std::lock_guard<SpinLock> lock_guard(outstanding_event_map_lock_);
#ifdef PADDLE_WITH_CUDA
if (UNLIKELY(platform::CUDAGraph::IsThisThreadCapturing())) {
......@@ -63,6 +67,9 @@ bool StreamSafeCUDAAllocation::CanBeFreed() {
}
#endif
std::call_once(once_flag_,
[this] { phi::backends::gpu::SetDeviceId(place_.device); });
RecordGraphCapturingStreams();
for (auto it = outstanding_event_map_.begin();
......@@ -259,6 +266,8 @@ uint64_t StreamSafeCUDAAllocator::ProcessUnfreedAllocationsAndRelease() {
return underlying_allocator_->Release(place_);
}
std::once_flag StreamSafeCUDAAllocation::once_flag_;
std::map<platform::Place, std::vector<StreamSafeCUDAAllocator*>>
StreamSafeCUDAAllocator::allocator_map_;
SpinLock StreamSafeCUDAAllocator::allocator_map_lock_;
......
......@@ -45,6 +45,7 @@ class StreamSafeCUDAAllocation : public Allocation {
gpuStream_t GetOwningStream() const;
private:
static std::once_flag once_flag_;
void RecordGraphCapturingStreams();
void RecordStreamWithNoGraphCapturing(gpuStream_t stream);
DecoratedAllocationPtr underlying_allocation_;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册