diff --git a/paddle/fluid/platform/gpu_info.cc b/paddle/fluid/platform/gpu_info.cc
index fda233b3a016b5df6d9945ebf8cf7a749e7801a2..76edb3910cccedf761721e7ca34e49fc1054e59b 100644
--- a/paddle/fluid/platform/gpu_info.cc
+++ b/paddle/fluid/platform/gpu_info.cc
@@ -22,10 +22,12 @@ limitations under the License. */
 #else
 #include "paddle/fluid/platform/dynload/cudnn.h"
 #endif
+#include "paddle/fluid/memory/malloc.h"
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/platform/lock_guard_ptr.h"
 #include "paddle/fluid/platform/macros.h"
 #include "paddle/fluid/platform/monitor.h"
+#include "paddle/fluid/platform/place.h"
 #include "paddle/fluid/string/split.h"
 
 DECLARE_double(fraction_of_gpu_memory_to_use);
@@ -630,5 +632,12 @@ bool IsCudaMallocRecorded(int dev_id) {
   return RecordedCudaMallocHelper::Instance(dev_id)->NeedRecord();
 }
 
+void EmptyCache(void) {
+  std::vector<int> devices = GetSelectedDevices();
+  for (auto device : devices) {
+    memory::Release(CUDAPlace(device));
+  }
+}
+
 }  // namespace platform
 }  // namespace paddle
diff --git a/paddle/fluid/platform/gpu_info.h b/paddle/fluid/platform/gpu_info.h
index b5800ef0838859bc4a49f99f82e4163599b0ecd9..ef7f93a61dbfb36ee4db5ee2f48879815414ecbc 100644
--- a/paddle/fluid/platform/gpu_info.h
+++ b/paddle/fluid/platform/gpu_info.h
@@ -137,6 +137,9 @@ uint64_t RecordedCudaMallocSize(int dev_id);
 
 bool IsCudaMallocRecorded(int dev_id);
 
+//! Empty idle cached memory held by the allocator.
+void EmptyCache(void);
+
 }  // namespace platform
 }  // namespace paddle
 
diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
index b0148e50afc5483ffd706858345bb4b3e4a964f8..f797ed5142c3dcfde11c79f340d023b16f1ec141 100644
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -2254,6 +2254,7 @@ All parameter, weight, gradient are variables in Paddle.
   m.def("op_support_gpu", OpSupportGPU);
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
   m.def("get_cuda_device_count", platform::GetCUDADeviceCount);
+  m.def("cuda_empty_cache", platform::EmptyCache);
 
 #if !defined(PADDLE_WITH_HIP) && !defined(_WIN32)
   m.def("nvprof_init", platform::CudaProfilerInit);
diff --git a/python/paddle/device/cuda/__init__.py b/python/paddle/device/cuda/__init__.py
index 834cda71fdc5f14e1d7defa70641dafaf35a4ae4..be2e2488a304917e91dae47dc76c91e56ab2e010 100644
--- a/python/paddle/device/cuda/__init__.py
+++ b/python/paddle/device/cuda/__init__.py
@@ -23,6 +23,7 @@ __all__ = [
     'current_stream',
     'synchronize',
     'device_count',
+    'empty_cache',
 ]
 
 
@@ -117,3 +118,26 @@ def device_count():
         core, 'get_cuda_device_count') else 0
 
     return num_gpus
+
+
+def empty_cache():
+    """
+    Releases idle cached memory held by the allocator so that those can be used in other GPU
+    application and visible in `nvidia-smi`. In most cases you don't need to use this function,
+    Paddle does not release the memory back to the OS when you remove Tensors on the GPU,
+    Because it keeps gpu memory in a pool so that next allocations can be done much faster.
+
+    Examples:
+        .. code-block:: python
+
+            import paddle
+
+            # required: gpu
+            paddle.set_device("gpu")
+            tensor = paddle.randn([512, 512, 512], "float")
+            del tensor
+            paddle.device.cuda.empty_cache()
+    """
+
+    if core.is_compiled_with_cuda():
+        core.cuda_empty_cache()
diff --git a/python/paddle/fluid/tests/unittests/test_cuda_empty_cache.py b/python/paddle/fluid/tests/unittests/test_cuda_empty_cache.py
new file mode 100644
index 0000000000000000000000000000000000000000..4aefb234bbfc144304c1687b4dd21387bb91a2a9
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_cuda_empty_cache.py
@@ -0,0 +1,27 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import unittest
+
+
+class TestEmptyCache(unittest.TestCase):
+    def test_empty_cache(self):
+        x = paddle.randn((2, 10, 12)).astype('float32')
+        del x
+        self.assertIsNone(paddle.device.cuda.empty_cache())
+
+
+if __name__ == '__main__':
+    unittest.main()