diff --git a/paddle/fluid/platform/gpu_info.cc b/paddle/fluid/platform/gpu_info.cc index fda233b3a016b5df6d9945ebf8cf7a749e7801a2..76edb3910cccedf761721e7ca34e49fc1054e59b 100644 --- a/paddle/fluid/platform/gpu_info.cc +++ b/paddle/fluid/platform/gpu_info.cc @@ -22,10 +22,12 @@ limitations under the License. */ #else #include "paddle/fluid/platform/dynload/cudnn.h" #endif +#include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/lock_guard_ptr.h" #include "paddle/fluid/platform/macros.h" #include "paddle/fluid/platform/monitor.h" +#include "paddle/fluid/platform/place.h" #include "paddle/fluid/string/split.h" DECLARE_double(fraction_of_gpu_memory_to_use); @@ -630,5 +632,12 @@ bool IsCudaMallocRecorded(int dev_id) { return RecordedCudaMallocHelper::Instance(dev_id)->NeedRecord(); } +void EmptyCache(void) { + std::vector devices = GetSelectedDevices(); + for (auto device : devices) { + memory::Release(CUDAPlace(device)); + } +} + } // namespace platform } // namespace paddle diff --git a/paddle/fluid/platform/gpu_info.h b/paddle/fluid/platform/gpu_info.h index b5800ef0838859bc4a49f99f82e4163599b0ecd9..ef7f93a61dbfb36ee4db5ee2f48879815414ecbc 100644 --- a/paddle/fluid/platform/gpu_info.h +++ b/paddle/fluid/platform/gpu_info.h @@ -137,6 +137,9 @@ uint64_t RecordedCudaMallocSize(int dev_id); bool IsCudaMallocRecorded(int dev_id); +//! Empty idle cached memory held by the allocator. +void EmptyCache(void); + } // namespace platform } // namespace paddle diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index b0148e50afc5483ffd706858345bb4b3e4a964f8..f797ed5142c3dcfde11c79f340d023b16f1ec141 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -2254,6 +2254,7 @@ All parameter, weight, gradient are variables in Paddle. m.def("op_support_gpu", OpSupportGPU); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) m.def("get_cuda_device_count", platform::GetCUDADeviceCount); + m.def("cuda_empty_cache", platform::EmptyCache); #if !defined(PADDLE_WITH_HIP) && !defined(_WIN32) m.def("nvprof_init", platform::CudaProfilerInit); diff --git a/python/paddle/device/cuda/__init__.py b/python/paddle/device/cuda/__init__.py index 834cda71fdc5f14e1d7defa70641dafaf35a4ae4..be2e2488a304917e91dae47dc76c91e56ab2e010 100644 --- a/python/paddle/device/cuda/__init__.py +++ b/python/paddle/device/cuda/__init__.py @@ -23,6 +23,7 @@ __all__ = [ 'current_stream', 'synchronize', 'device_count', + 'empty_cache', ] @@ -117,3 +118,26 @@ def device_count(): core, 'get_cuda_device_count') else 0 return num_gpus + + +def empty_cache(): + """ + Releases idle cached memory held by the allocator so that those can be used in other GPU + application and visible in `nvidia-smi`. In most cases you don't need to use this function, + Paddle does not release the memory back to the OS when you remove Tensors on the GPU, + Because it keeps gpu memory in a pool so that next allocations can be done much faster. + + Examples: + .. code-block:: python + + import paddle + + # required: gpu + paddle.set_device("gpu") + tensor = paddle.randn([512, 512, 512], "float") + del tensor + paddle.device.cuda.empty_cache() + """ + + if core.is_compiled_with_cuda(): + core.cuda_empty_cache() diff --git a/python/paddle/fluid/tests/unittests/test_cuda_empty_cache.py b/python/paddle/fluid/tests/unittests/test_cuda_empty_cache.py new file mode 100644 index 0000000000000000000000000000000000000000..4aefb234bbfc144304c1687b4dd21387bb91a2a9 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_cuda_empty_cache.py @@ -0,0 +1,27 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import unittest + + +class TestEmptyCache(unittest.TestCase): + def test_empty_cache(self): + x = paddle.randn((2, 10, 12)).astype('float32') + del x + self.assertIsNone(paddle.device.cuda.empty_cache()) + + +if __name__ == '__main__': + unittest.main()