Add api paddle.device.cuda.empty_cache to release idle gpu memory hold by allocator。 (#35427)

* Add empty_cache api to release idle gpu memory hold by allocator,test=develop * Add empty_cache api to release idle gpu memory hold by allocator,test=develop * Add empty_cache api to release idle gpu memory hold by allocator,test=develop * Fix test coverage problem for empty_cache * delete redundant check for empty_cache * fix the problem of empty_cache's doc * delete the nvidia-smi comment in doc of empty_cache, test=document_fix

Add api paddle.device.cuda.empty_cache to release idle gpu memory hold by allocator。 (#35427)
* Add empty_cache api to release idle gpu memory hold by allocator,test=develop * Add empty_cache api to release idle gpu memory hold by allocator,test=develop * Add empty_cache api to release idle gpu memory hold by allocator,test=develop * Fix test coverage problem for empty_cache * delete redundant check for empty_cache * fix the problem of empty_cache's doc * delete the nvidia-smi comment in doc of empty_cache, test=document_fix
83932715 · chenenquan · GitHub · 657a8c8f · 83932715 · 83932715
5 changed file
--- a/paddle/fluid/platform/gpu_info.cc
+++ b/paddle/fluid/platform/gpu_info.cc
@@ -22,10 +22,12 @@ limitations under the License. */
 #else
 #include "paddle/fluid/platform/dynload/cudnn.h"
 #endif
+#include "paddle/fluid/memory/malloc.h"
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/platform/lock_guard_ptr.h"
 #include "paddle/fluid/platform/macros.h"
 #include "paddle/fluid/platform/monitor.h"
+#include "paddle/fluid/platform/place.h"
 #include "paddle/fluid/string/split.h"

 DECLARE_double(fraction_of_gpu_memory_to_use);
@@ -630,5 +632,12 @@ bool IsCudaMallocRecorded(int dev_id) {
  return RecordedCudaMallocHelper::Instance(dev_id)->NeedRecord();
 }

+void EmptyCache(void) {
+  std::vector<int> devices = GetSelectedDevices();
+  for (auto device : devices) {
+    memory::Release(CUDAPlace(device));
+  }
+}
+
 }  // namespace platform
 }  // namespace paddle
--- a/paddle/fluid/platform/gpu_info.h
+++ b/paddle/fluid/platform/gpu_info.h
@@ -137,6 +137,9 @@ uint64_t RecordedCudaMallocSize(int dev_id);

 bool IsCudaMallocRecorded(int dev_id);

+//! Empty idle cached memory held by the allocator.
+void EmptyCache(void);
+
 }  // namespace platform
 }  // namespace paddle


--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -2254,6 +2254,7 @@ All parameter, weight, gradient are variables in Paddle.
  m.def("op_support_gpu", OpSupportGPU);
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
  m.def("get_cuda_device_count", platform::GetCUDADeviceCount);
+  m.def("cuda_empty_cache", platform::EmptyCache);

 #if !defined(PADDLE_WITH_HIP) && !defined(_WIN32)
  m.def("nvprof_init", platform::CudaProfilerInit);

--- a/python/paddle/device/cuda/__init__.py
+++ b/python/paddle/device/cuda/__init__.py
@@ -23,6 +23,7 @@ __all__ = [
    'current_stream',
    'synchronize',
    'device_count',
+    'empty_cache',
 ]


@@ -117,3 +118,26 @@ def device_count():
        core, 'get_cuda_device_count') else 0

    return num_gpus
+
+
+def empty_cache():
+    """
+    Releases idle cached memory held by the allocator so that those can be used in other GPU
+    application and visible in `nvidia-smi`. In most cases you don't need to use this function,
+    Paddle does not release the memory back to the OS when you remove Tensors on the GPU,
+    Because it keeps gpu memory in a pool so that next allocations can be done much faster.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+
+            # required: gpu
+            paddle.set_device("gpu")
+            tensor = paddle.randn([512, 512, 512], "float")
+            del tensor
+            paddle.device.cuda.empty_cache()
+    """
+
+    if core.is_compiled_with_cuda():
+        core.cuda_empty_cache()
--- a/python/paddle/fluid/tests/unittests/test_cuda_empty_cache.py
+++ b/python/paddle/fluid/tests/unittests/test_cuda_empty_cache.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import unittest
+
+
+class TestEmptyCache(unittest.TestCase):
+    def test_empty_cache(self):
+        x = paddle.randn((2, 10, 12)).astype('float32')
+        del x
+        self.assertIsNone(paddle.device.cuda.empty_cache())
+
+
+if __name__ == '__main__':
+    unittest.main()