未验证 提交 83932715 编写于 作者: C chenenquan 提交者: GitHub

Add api paddle.device.cuda.empty_cache to release idle gpu memory hold by allocator。 (#35427)

* Add empty_cache api to release idle gpu memory hold by allocator,test=develop

* Add empty_cache api to release idle gpu memory hold by allocator,test=develop

* Add empty_cache api to release idle gpu memory hold by allocator,test=develop

* Fix test coverage problem for empty_cache

* delete redundant check for empty_cache

* fix the problem of empty_cache's doc

* delete the nvidia-smi comment in doc of empty_cache, test=document_fix
上级 657a8c8f
......@@ -22,10 +22,12 @@ limitations under the License. */
#else
#include "paddle/fluid/platform/dynload/cudnn.h"
#endif
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/lock_guard_ptr.h"
#include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/monitor.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/string/split.h"
DECLARE_double(fraction_of_gpu_memory_to_use);
......@@ -630,5 +632,12 @@ bool IsCudaMallocRecorded(int dev_id) {
return RecordedCudaMallocHelper::Instance(dev_id)->NeedRecord();
}
void EmptyCache(void) {
std::vector<int> devices = GetSelectedDevices();
for (auto device : devices) {
memory::Release(CUDAPlace(device));
}
}
} // namespace platform
} // namespace paddle
......@@ -137,6 +137,9 @@ uint64_t RecordedCudaMallocSize(int dev_id);
bool IsCudaMallocRecorded(int dev_id);
//! Empty idle cached memory held by the allocator.
void EmptyCache(void);
} // namespace platform
} // namespace paddle
......
......@@ -2254,6 +2254,7 @@ All parameter, weight, gradient are variables in Paddle.
m.def("op_support_gpu", OpSupportGPU);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
m.def("get_cuda_device_count", platform::GetCUDADeviceCount);
m.def("cuda_empty_cache", platform::EmptyCache);
#if !defined(PADDLE_WITH_HIP) && !defined(_WIN32)
m.def("nvprof_init", platform::CudaProfilerInit);
......
......@@ -23,6 +23,7 @@ __all__ = [
'current_stream',
'synchronize',
'device_count',
'empty_cache',
]
......@@ -117,3 +118,26 @@ def device_count():
core, 'get_cuda_device_count') else 0
return num_gpus
def empty_cache():
"""
Releases idle cached memory held by the allocator so that those can be used in other GPU
application and visible in `nvidia-smi`. In most cases you don't need to use this function,
Paddle does not release the memory back to the OS when you remove Tensors on the GPU,
Because it keeps gpu memory in a pool so that next allocations can be done much faster.
Examples:
.. code-block:: python
import paddle
# required: gpu
paddle.set_device("gpu")
tensor = paddle.randn([512, 512, 512], "float")
del tensor
paddle.device.cuda.empty_cache()
"""
if core.is_compiled_with_cuda():
core.cuda_empty_cache()
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import unittest
class TestEmptyCache(unittest.TestCase):
def test_empty_cache(self):
x = paddle.randn((2, 10, 12)).astype('float32')
del x
self.assertIsNone(paddle.device.cuda.empty_cache())
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册