diff --git a/paddle/fluid/memory/allocation/legacy_allocator.cc b/paddle/fluid/memory/allocation/legacy_allocator.cc index 514ac7883ad2effdf3518be8afe3f448a5ac10b2..2ecb44ff15fec23e9b2a0045a959a2f6ed8a0a8c 100644 --- a/paddle/fluid/memory/allocation/legacy_allocator.cc +++ b/paddle/fluid/memory/allocation/legacy_allocator.cc @@ -29,6 +29,9 @@ #include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/string/printf.h" #include "paddle/fluid/string/split.h" +#ifdef PADDLE_WITH_CUDA +#include "paddle/fluid/platform/cuda_device_guard.h" +#endif DEFINE_bool(init_allocated_mem, false, "It is a mistake that the values of the memory allocated by " @@ -142,7 +145,6 @@ BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) { std::call_once(init_flag, [gpu_id]() { devices = platform::GetSelectedDevices(); int gpu_num = devices.size(); - allocation::GPUMemMonitor.Initialize(devices.size()); a_arr = new BuddyAllocator *[gpu_num]; @@ -168,9 +170,9 @@ BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) { << ". Current 'FLAGS_reallocate_gpu_memory_in_mb' value is " << FLAGS_reallocate_gpu_memory_in_mb << "\n\n"; } + platform::SetDeviceId(gpu_id); }); - platform::SetDeviceId(gpu_id); auto pos = std::distance(devices.begin(), std::find(devices.begin(), devices.end(), gpu_id)); return a_arr[pos]; @@ -193,8 +195,7 @@ void *Alloc(const platform::CUDAPlace &place, auto *buddy_allocator = GetGPUBuddyAllocator(place.device); auto *ptr = buddy_allocator->Alloc(size); if (ptr == nullptr) { - int cur_dev = platform::GetCurrentDeviceId(); - platform::SetDeviceId(place.device); + platform::CUDADeviceGuard(place.device); size_t avail, total; platform::GpuMemoryUsage(&avail, &total); LOG(FATAL) << "Cannot allocate " << string::HumanReadableSize(size) @@ -206,7 +207,6 @@ void *Alloc(const platform::CUDAPlace &place, << string::HumanReadableSize(buddy_allocator->GetMaxChunkSize()) << "GPU memory used: " << string::HumanReadableSize(Used(place)); - platform::SetDeviceId(cur_dev); } else { if (FLAGS_benchmark) { allocation::GPUMemMonitor.Add(place.device, size); diff --git a/paddle/fluid/memory/detail/system_allocator.cc b/paddle/fluid/memory/detail/system_allocator.cc index 41d79c5beb1367907a401b572d3d0eaf3a8ac67b..b0f48c455caf4606a4af63b54b6510f33f68894d 100644 --- a/paddle/fluid/memory/detail/system_allocator.cc +++ b/paddle/fluid/memory/detail/system_allocator.cc @@ -29,6 +29,9 @@ limitations under the License. */ #include "paddle/fluid/platform/cpu_info.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/gpu_info.h" +#ifdef PADDLE_WITH_CUDA +#include "paddle/fluid/platform/cuda_device_guard.h" +#endif DECLARE_bool(use_pinned_memory); DECLARE_double(fraction_of_gpu_memory_to_use); @@ -104,18 +107,11 @@ void* GPUAllocator::Alloc(size_t* index, size_t size) { // CUDA documentation doesn't explain if cudaMalloc returns nullptr // if size is 0. We just make sure it does. if (size <= 0) return nullptr; - void* p; - int prev_id; - cudaGetDevice(&prev_id); - if (prev_id != gpu_id_) { - cudaSetDevice(gpu_id_); - } - cudaError_t result = cudaMalloc(&p, size); + paddle::platform::CUDADeviceGuard guard(gpu_id_); - if (prev_id != gpu_id_) { - cudaSetDevice(prev_id); - } + void* p; + cudaError_t result = cudaMalloc(&p, size); if (result == cudaSuccess) { *index = 0; @@ -140,7 +136,6 @@ void* GPUAllocator::Alloc(size_t* index, size_t size) { void GPUAllocator::Free(void* p, size_t size, size_t index) { cudaError_t err; - if (index == 0) { PADDLE_ASSERT(gpu_alloc_size_ >= size); gpu_alloc_size_ -= size;