Simplize system_allocator and fix GPU_INFO (#6653)

1b0c7d7c · Yu Yang · GitHub · c13805e9 · 1b0c7d7c · 1b0c7d7c
隐藏空白更改
内联并排

Showing with 23 addition and 46 deletion

paddle/memory/detail/system_allocator.cc paddle/memory/detail/system_allocator.cc +13 -37

paddle/platform/gpu_info.cc paddle/platform/gpu_info.cc +10 -9

未找到文件。
--- a/paddle/memory/detail/system_allocator.cc
+++ b/paddle/memory/detail/system_allocator.cc
@@ -19,6 +19,7 @@ limitations under the License. */
 #include <stdlib.h>    // for malloc and free
 #include <sys/mman.h>  // for mlock and munlock
+#include <algorithm>   // for std::max
 #include "gflags/gflags.h"
@@ -28,7 +29,7 @@ limitations under the License. */
 // of memory available to the system for paging.  So, by default, we
 // should set false to use_pinned_memory.
 DEFINE_bool(use_pinned_memory, true, "If set, allocate cpu pinned memory.");
+DECLARE_double(fraction_of_gpu_memory_to_use);
 namespace paddle {
 namespace memory {
 namespace detail {
@@ -77,45 +78,20 @@ void* GPUAllocator::Alloc(size_t& index, size_t size) {
  // CUDA documentation doesn't explain if cudaMalloc returns nullptr
  // if size is 0.  We just make sure it does.
  if (size <= 0) return nullptr;
+  void* p;
-  size_t available = 0;
+  cudaError_t result = cudaMalloc(&p, size);
-  size_t capacity = 0;
-  paddle::platform::GpuMemoryUsage(available, capacity);
-  // Reserve memory for page tables, etc.
-  size_t reserving = 0.05 * capacity + paddle::platform::GpuMinChunkSize();
-  size_t usable = available > reserving ? available - reserving : 0;
-  // If remaining size no less than expected size, using general
-  // cudaMalloc to allocate GPU memory.
-  void* p = 0;
-  if (size <= usable) {
-    cudaError_t result = cudaMalloc(&p, size);
-    if (result == cudaSuccess) {
-      index = 0;
-      gpu_alloc_size_ += size;
-      return p;
-    }
-  }
-  // If remaining size less than expected size or cudaMalloc failed,
-  // cudaMallocHost will be considered as a fallback allocator.
-  //
-  // NOTE: here, we use GpuMaxAllocSize() as the maximum memory size
-  // of host fallback allocation. Allocates too much would reduce
-  // the amount of memory available to the underlying system for paging.
-  usable = paddle::platform::GpuMaxAllocSize() - fallback_alloc_size_;
-  if (size > usable) return nullptr;
-  cudaError_t result = cudaMallocHost(&p, size);
  if (result == cudaSuccess) {
-    index = 1;
+    index = 0;
-    fallback_alloc_size_ += size;
+    gpu_alloc_size_ += size;
    return p;
+  } else {
+    LOG(WARNING)
+        << "Cannot malloc " << size / 1024.0 / 1024.0
+        << " MB GPU memory. Please shrink FLAGS_fraction_of_gpu_memory_to_use "
+           "environment variable to a lower value. Current value is "
+        << FLAGS_fraction_of_gpu_memory_to_use;
+    return nullptr;
  }
-  return nullptr;
 }
 void GPUAllocator::Free(void* p, size_t size, size_t index) {

--- a/paddle/platform/gpu_info.cc
+++ b/paddle/platform/gpu_info.cc
@@ -73,19 +73,20 @@ size_t GpuMaxChunkSize() {
  size_t available = 0;
  GpuMemoryUsage(available, total);
+  VLOG(10) << "GPU Usage " << available / 1024 / 1024 << "M/"
-  // Reserving the rest memory for page tables, etc.
+           << total / 1024 / 1024 << "M";
-  size_t reserving = 0.05 * total;
+  size_t reserving = static_cast<size_t>(0.05 * total);
  // If available less than minimum chunk size, no usable memory exists.
  available =
-      std::max(std::max(available, GpuMinChunkSize()) - GpuMinChunkSize(),
+      std::min(std::max(available, GpuMinChunkSize()) - GpuMinChunkSize(),
-               reserving) -
+               total - reserving);
-      reserving;
+  // Reserving the rest memory for page tables, etc.
-  size_t allocating = FLAGS_fraction_of_gpu_memory_to_use * total;
+  size_t allocating = static_cast<size_t>(FLAGS_fraction_of_gpu_memory_to_use *
+                                          (total - reserving));
-  PADDLE_ENFORCE_LT(allocating, available);
+  PADDLE_ENFORCE_LE(allocating, available);
  return allocating;
 }