diff --git a/paddle/fluid/memory/detail/system_allocator.cc b/paddle/fluid/memory/detail/system_allocator.cc
index 0b14fc6a3fc4a0e4a58b3fac1aafe8ee5c65e8dc..001d5f4d6ae5fbf2b7947ce04bf0785c26498487 100644
--- a/paddle/fluid/memory/detail/system_allocator.cc
+++ b/paddle/fluid/memory/detail/system_allocator.cc
@@ -105,6 +105,21 @@ bool CPUAllocator::UseGpu() const { return false; }
 
 #ifdef PADDLE_WITH_CUDA
 
+static void ClearCUDAOutOfMemoryError(cudaError_t* status) {
+  if (*status == cudaErrorMemoryAllocation) {
+    *status = cudaSuccess;
+  }
+
+  PADDLE_ENFORCE_CUDA_SUCCESS(*status);
+
+  *status = cudaGetLastError();
+  if (*status == cudaErrorMemoryAllocation) {
+    *status = cudaSuccess;
+  }
+
+  PADDLE_ENFORCE_CUDA_SUCCESS(*status);
+}
+
 void* GPUAllocator::Alloc(size_t* index, size_t size) {
   // CUDA documentation doesn't explain if cudaMalloc returns nullptr
   // if size is 0.  We just make sure it does.
@@ -120,19 +135,19 @@ void* GPUAllocator::Alloc(size_t* index, size_t size) {
     gpu_alloc_size_ += size;
     return p;
   } else {
-    if (result == cudaErrorMemoryAllocation) {
-      result = cudaSuccess;
-    }
-    PADDLE_ENFORCE_CUDA_SUCCESS(result);
-
-    result = cudaGetLastError();
-    if (result == cudaErrorMemoryAllocation) {
-      result = cudaSuccess;
-    }
-    PADDLE_ENFORCE_CUDA_SUCCESS(result);
-
-    size_t avail, total;
-    platform::GpuMemoryUsage(&avail, &total);
+    ClearCUDAOutOfMemoryError(&result);
+
+    /**
+     * NOTE(zjl): Sometimes cudaMemGetInfo would raise OOM error
+     * if there is very little GPU memory left. In this case, we
+     * should consider the available GPU memory to be 0, and throw
+     * exception inside this function instead of throwing exception
+     * inside cudaMemGetInfo.
+     */
+    size_t avail = 0, total = 0;
+    result = cudaMemGetInfo(&avail, &total);
+    if (result != cudaSuccess) avail = 0;
+    ClearCUDAOutOfMemoryError(&result);
 
     PADDLE_THROW_BAD_ALLOC(
         "\n\nOut of memory error on GPU %d. "