未验证 提交 32633c8e 编写于 作者: R Ruibiao Chen 提交者: GitHub

SetDeviceId in StreamSafeCUDAAllocation (#49080)

上级 439b2b94
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <thread> #include <thread>
#include "paddle/fluid/platform/profiler/event_tracing.h" #include "paddle/fluid/platform/profiler/event_tracing.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/device/gpu/cuda/cuda_graph.h" #include "paddle/fluid/platform/device/gpu/cuda/cuda_graph.h"
...@@ -43,6 +44,9 @@ void StreamSafeCUDAAllocation::RecordStream(gpuStream_t stream) { ...@@ -43,6 +44,9 @@ void StreamSafeCUDAAllocation::RecordStream(gpuStream_t stream) {
return; return;
} }
std::call_once(once_flag_,
[this] { phi::backends::gpu::SetDeviceId(place_.device); });
std::lock_guard<SpinLock> lock_guard(outstanding_event_map_lock_); std::lock_guard<SpinLock> lock_guard(outstanding_event_map_lock_);
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
if (UNLIKELY(platform::CUDAGraph::IsThisThreadCapturing())) { if (UNLIKELY(platform::CUDAGraph::IsThisThreadCapturing())) {
...@@ -63,6 +67,9 @@ bool StreamSafeCUDAAllocation::CanBeFreed() { ...@@ -63,6 +67,9 @@ bool StreamSafeCUDAAllocation::CanBeFreed() {
} }
#endif #endif
std::call_once(once_flag_,
[this] { phi::backends::gpu::SetDeviceId(place_.device); });
RecordGraphCapturingStreams(); RecordGraphCapturingStreams();
for (auto it = outstanding_event_map_.begin(); for (auto it = outstanding_event_map_.begin();
...@@ -259,6 +266,8 @@ uint64_t StreamSafeCUDAAllocator::ProcessUnfreedAllocationsAndRelease() { ...@@ -259,6 +266,8 @@ uint64_t StreamSafeCUDAAllocator::ProcessUnfreedAllocationsAndRelease() {
return underlying_allocator_->Release(place_); return underlying_allocator_->Release(place_);
} }
std::once_flag StreamSafeCUDAAllocation::once_flag_;
std::map<platform::Place, std::vector<StreamSafeCUDAAllocator*>> std::map<platform::Place, std::vector<StreamSafeCUDAAllocator*>>
StreamSafeCUDAAllocator::allocator_map_; StreamSafeCUDAAllocator::allocator_map_;
SpinLock StreamSafeCUDAAllocator::allocator_map_lock_; SpinLock StreamSafeCUDAAllocator::allocator_map_lock_;
......
...@@ -45,6 +45,7 @@ class StreamSafeCUDAAllocation : public Allocation { ...@@ -45,6 +45,7 @@ class StreamSafeCUDAAllocation : public Allocation {
gpuStream_t GetOwningStream() const; gpuStream_t GetOwningStream() const;
private: private:
static std::once_flag once_flag_;
void RecordGraphCapturingStreams(); void RecordGraphCapturingStreams();
void RecordStreamWithNoGraphCapturing(gpuStream_t stream); void RecordStreamWithNoGraphCapturing(gpuStream_t stream);
DecoratedAllocationPtr underlying_allocation_; DecoratedAllocationPtr underlying_allocation_;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册