diff --git a/paddle/fluid/memory/allocation/CMakeLists.txt b/paddle/fluid/memory/allocation/CMakeLists.txt
index 9cc7c267454a4dbd4e1f62ec971e4160d6088913..8a1a1115ad7bd3d917ac041504c3c20c6920ba9a 100644
--- a/paddle/fluid/memory/allocation/CMakeLists.txt
+++ b/paddle/fluid/memory/allocation/CMakeLists.txt
@@ -4,6 +4,7 @@ cc_library(locked_allocator SRCS locked_allocator.cc DEPS allocator)
 cc_library(buffered_allocator SRCS buffered_allocator.cc DEPS allocator)
 cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator)
 cc_library(naive_best_fit_allocator SRCS naive_best_fit_allocator.cc DEPS allocator buddy_allocator profiler)
+cc_test(naive_best_fit_allocator_test SRCS naive_best_fit_allocator_test.cc DEPS naive_best_fit_allocator)
 cc_test(buffered_allocator_test SRCS buffered_allocator_test.cc DEPS locked_allocator buffered_allocator cpu_allocator best_fit_allocator)
 
 if (WITH_MKLDNN)
diff --git a/paddle/fluid/memory/allocation/allocator.h b/paddle/fluid/memory/allocation/allocator.h
index e54748a53679d1363246896a0982275c4ef09535..b83d3efb72b719662a49be5a3b9aaf27e7386ed0 100644
--- a/paddle/fluid/memory/allocation/allocator.h
+++ b/paddle/fluid/memory/allocation/allocator.h
@@ -178,12 +178,15 @@ class Allocator {
     FreeImpl(allocation);
   }
 
+  inline void Release(const platform::Place& place) { ReleaseImpl(place); }
+
   // True if the `Allocate` is thread safe.
   virtual bool IsAllocThreadSafe() const;
 
  protected:
   virtual Allocation* AllocateImpl(size_t size) = 0;
   virtual void FreeImpl(Allocation* allocation);
+  virtual void ReleaseImpl(const platform::Place& place) {}
 };
 
 using AllocationDeleter = Allocator::AllocationDeleter;
diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc
index 3213684c140b02e1fa4b846cb0448f9bc9d8f3ee..59b06d082872c11b56855bee75e9d14ac686d1e1 100644
--- a/paddle/fluid/memory/allocation/allocator_facade.cc
+++ b/paddle/fluid/memory/allocation/allocator_facade.cc
@@ -287,6 +287,11 @@ AllocationPtr AllocatorFacade::Alloc(const platform::Place& place,
   return m_->GetAllocator(place, size)->Allocate(size);
 }
 
+void AllocatorFacade::Release(const platform::Place& place) {
+  m_->GetAllocator(place, /* A non-zero num to choose allocator_ */ 1)
+      ->Release(place);
+}
+
 }  // namespace allocation
 }  // namespace memory
 }  // namespace paddle
diff --git a/paddle/fluid/memory/allocation/allocator_facade.h b/paddle/fluid/memory/allocation/allocator_facade.h
index 64b6fe25c352e82d6320e26d95efb61f3cb4a5b1..2f2f222f6c74a5c957461258f43fb1abf65e29b1 100644
--- a/paddle/fluid/memory/allocation/allocator_facade.h
+++ b/paddle/fluid/memory/allocation/allocator_facade.h
@@ -44,6 +44,9 @@ class AllocatorFacade {
   // Allocate a unique allocation.
   AllocationPtr Alloc(const platform::Place& place, size_t size);
 
+  // Release unused memory pool.
+  void Release(const platform::Place& place);
+
   // TODO(yy): Allocate a Copy-On-Write allocation?
  private:
   AllocatorFacade();
diff --git a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h
index cbc126264ac2c09ef2532bf21834a648c02473ec..b55ebf18934f2ba4d7b67f76f1c55ba9b426780e 100644
--- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h
+++ b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h
@@ -39,6 +39,9 @@ class AutoGrowthBestFitAllocator : public Allocator {
 
   void FreeImpl(Allocation *allocation) override;
 
+  // Release the memory block which is not used in pool.
+  void ReleaseImpl(const platform::Place &place) override { FreeIdleChunks(); }
+
  private:
   void FreeIdleChunks();
 
diff --git a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc
index 685248a88f71d695095bd844dea06558e5cbcee6..dbe2f0ac94453ec7de0361dcf4eeb4817a947525 100644
--- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc
+++ b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc
@@ -65,6 +65,7 @@ static void TestFreeIdleChunk(bool free_idle_chunk,
     } else {
       ASSERT_EQ(recorded_allocator->AllocatedSize(), memory_size + alignment);
     }
+    ag_allocator->Release(platform::CPUPlace());
   }
 }
 
diff --git a/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc b/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
index c661c9f9c37509f6b55f6ce8b67b11752c68418a..842ebd16cf8afedc150caa65b2fdedf5c130bb1b 100644
--- a/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
+++ b/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
@@ -53,6 +53,9 @@ void *Alloc(const Place &place, size_t size);
 template <typename Place>
 void Free(const Place &place, void *p, size_t size);
 
+template <typename Place>
+void Release(const Place &place);
+
 template <typename Place>
 size_t Used(const Place &place);
 
@@ -99,6 +102,11 @@ void Free<platform::CPUPlace>(const platform::CPUPlace &place, void *p,
   GetCPUBuddyAllocator()->Free(p);
 }
 
+template <>
+void Release<platform::CPUPlace>(const platform::CPUPlace &place) {
+  GetCPUBuddyAllocator()->Release();
+}
+
 template <>
 size_t Used<platform::CPUPlace>(const platform::CPUPlace &place) {
   return GetCPUBuddyAllocator()->Used();
@@ -186,6 +194,17 @@ void Free<platform::XPUPlace>(const platform::XPUPlace &place, void *p,
 #endif
 }
 
+template <>
+void Release<platform::XPUPlace>(const platform::XPUPlace &place) {
+#ifdef PADDLE_WITH_XPU
+  PADDLE_THROW(
+      platform::errors::PermissionDenied("Release XPU pool is not supported."));
+#else
+  PADDLE_THROW(
+      platform::errors::PermissionDenied("'XPUPlace' is not supported."));
+#endif
+}
+
 template <>
 size_t Used<platform::XPUPlace>(const platform::XPUPlace &place) {
 #ifdef PADDLE_WITH_XPU
@@ -313,6 +332,16 @@ void Free<platform::CUDAPlace>(const platform::CUDAPlace &place, void *p,
 #endif
 }
 
+template <>
+void Release<platform::CUDAPlace>(const platform::CUDAPlace &place) {
+#ifdef PADDLE_WITH_CUDA
+  GetGPUBuddyAllocator(place.device)->Release();
+#else
+  PADDLE_THROW(platform::errors::PermissionDenied(
+      "'CUDAPlace' is not supported in CPU only device."));
+#endif
+}
+
 #ifdef PADDLE_WITH_CUDA
 BuddyAllocator *GetCUDAPinnedBuddyAllocator() {
   static std::once_flag init_flag;
@@ -371,6 +400,17 @@ void Free<platform::CUDAPinnedPlace>(const platform::CUDAPinnedPlace &place,
 #endif
 }
 
+template <>
+void Release<platform::CUDAPinnedPlace>(
+    const platform::CUDAPinnedPlace &place) {
+#ifdef PADDLE_WITH_CUDA
+  GetCUDAPinnedBuddyAllocator()->Release();
+#else
+  PADDLE_THROW(platform::errors::PermissionDenied(
+      "'CUDAPinnedPlace' is not supported in CPU only device."));
+#endif
+}
+
 struct AllocVisitor : public boost::static_visitor<void *> {
   inline explicit AllocVisitor(size_t size) : size_(size) {}
 
@@ -397,6 +437,13 @@ struct FreeVisitor : public boost::static_visitor<void> {
   size_t size_;
 };
 
+struct ReleaseVisitor : public boost::static_visitor<void> {
+  template <typename Place>
+  inline void operator()(const Place &place) const {
+    Release<Place>(place);
+  }
+};
+
 size_t Usage::operator()(const platform::CPUPlace &cpu) const {
   return Used(cpu);
 }
@@ -439,6 +486,10 @@ void NaiveBestFitAllocator::FreeImpl(Allocation *allocation) {
   delete allocation;
 }
 
+void NaiveBestFitAllocator::ReleaseImpl(const platform::Place &place) {
+  boost::apply_visitor(legacy::ReleaseVisitor(), place);
+}
+
 }  // namespace allocation
 }  // namespace memory
 }  // namespace paddle
diff --git a/paddle/fluid/memory/allocation/naive_best_fit_allocator.h b/paddle/fluid/memory/allocation/naive_best_fit_allocator.h
index 4cf1bd6123e5fb3b99c60cc0a2750ef6295ab870..ba4c4ca226b1e08428df332a6b9f2f6774a07692 100644
--- a/paddle/fluid/memory/allocation/naive_best_fit_allocator.h
+++ b/paddle/fluid/memory/allocation/naive_best_fit_allocator.h
@@ -35,6 +35,7 @@ class NaiveBestFitAllocator : public Allocator {
  protected:
   Allocation *AllocateImpl(size_t size) override;
   void FreeImpl(Allocation *allocation) override;
+  void ReleaseImpl(const platform::Place &place) override;
 
  private:
   platform::Place place_;
diff --git a/paddle/fluid/memory/allocation/naive_best_fit_allocator_test.cc b/paddle/fluid/memory/allocation/naive_best_fit_allocator_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..054c75b11f78c7733c15ac39a44cdc45078af7e7
--- /dev/null
+++ b/paddle/fluid/memory/allocation/naive_best_fit_allocator_test.cc
@@ -0,0 +1,74 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/memory/allocation/naive_best_fit_allocator.h"
+
+#include <algorithm>
+#include <chrono>              // NOLINT
+#include <condition_variable>  // NOLINT
+#include <mutex>               // NOLINT
+#include <string>
+#include <thread>  // NOLINT
+#include <vector>
+
+#include "gtest/gtest.h"
+
+namespace paddle {
+namespace memory {
+namespace allocation {
+
+TEST(NaiveBestFitAllocatorTest, CpuAlloc) {
+  NaiveBestFitAllocator alloc{platform::CPUPlace()};
+  {
+    size_t size = (1 << 20);
+    auto allocation = alloc.Allocate(size);
+  }
+  alloc.Release(platform::CPUPlace());
+
+  size_t size = (1 << 20);
+  auto allocation = alloc.Allocate(size);
+  alloc.Release(platform::CPUPlace());
+}
+
+#ifdef PADDLE_WITH_CUDA
+TEST(NaiveBestFitAllocatorTest, GpuAlloc) {
+  NaiveBestFitAllocator alloc{platform::CUDAPlace(0)};
+  {
+    size_t size = (1 << 20);
+    auto allocation = alloc.Allocate(size);
+  }
+  alloc.Release(platform::CUDAPlace(0));
+
+  size_t size = (1 << 20);
+  auto allocation = alloc.Allocate(size);
+  alloc.Release(platform::CUDAPlace(0));
+}
+
+TEST(NaiveBestFitAllocatorTest, CudaPinnedAlloc) {
+  NaiveBestFitAllocator alloc{platform::CUDAPinnedPlace()};
+  {
+    size_t size = (1 << 20);
+    auto allocation = alloc.Allocate(size);
+  }
+  alloc.Release(platform::CUDAPinnedPlace());
+
+  size_t size = (1 << 20);
+  auto allocation = alloc.Allocate(size);
+  alloc.Release(platform::CUDAPinnedPlace());
+}
+#endif
+
+}  // namespace allocation
+}  // namespace memory
+}  // namespace paddle
diff --git a/paddle/fluid/memory/allocation/retry_allocator.h b/paddle/fluid/memory/allocation/retry_allocator.h
index 4a787ff2d7b3848207449a1b7c04da0bd9884ea6..74828a0ede3f4318e8fe336ad0f189e3d58725f2 100644
--- a/paddle/fluid/memory/allocation/retry_allocator.h
+++ b/paddle/fluid/memory/allocation/retry_allocator.h
@@ -47,6 +47,9 @@ class RetryAllocator : public Allocator {
  protected:
   void FreeImpl(Allocation* allocation) override;
   Allocation* AllocateImpl(size_t size) override;
+  void ReleaseImpl(const platform::Place& place) override {
+    underlying_allocator_->Release(place);
+  }
 
  private:
   std::shared_ptr<Allocator> underlying_allocator_;
diff --git a/paddle/fluid/memory/allocation/retry_allocator_test.cc b/paddle/fluid/memory/allocation/retry_allocator_test.cc
index b80e48460bf9f537667652d4937fca7bead1fe51..13b77c660ca8f54a9e1b7befcef40e9a76c0833f 100644
--- a/paddle/fluid/memory/allocation/retry_allocator_test.cc
+++ b/paddle/fluid/memory/allocation/retry_allocator_test.cc
@@ -96,6 +96,7 @@ TEST(RetryAllocator, RetryAllocator) {
     bool is_all_equal = std::all_of(addresses.begin(), addresses.end(),
                                     [val](void *p) { return p == val; });
     ASSERT_TRUE(is_all_equal);
+    allocator->Release(platform::CPUPlace());
   }
 }
 
@@ -135,6 +136,7 @@ TEST(RetryAllocator, RetryAllocatorLastAllocFailure) {
       auto allocation = allocator.Allocate(allocate_size);
       ASSERT_TRUE(false);
       allocation.reset();
+      allocator.Release(p);
     } catch (BadAlloc &ex) {
       ASSERT_TRUE(std::string(ex.what()).find("Cannot allocate") !=
                   std::string::npos);
diff --git a/paddle/fluid/memory/allocation/thread_local_allocator.cc b/paddle/fluid/memory/allocation/thread_local_allocator.cc
index 50fe9c9b7524945117abd8441f1f53f6e9ce1328..d2a8250d3db58cce463fac15587a1bef99b274d6 100644
--- a/paddle/fluid/memory/allocation/thread_local_allocator.cc
+++ b/paddle/fluid/memory/allocation/thread_local_allocator.cc
@@ -72,6 +72,8 @@ void ThreadLocalAllocatorImpl::FreeImpl(ThreadLocalAllocation* allocation) {
   delete allocation;
 }
 
+void ThreadLocalAllocatorImpl::ReleaseImpl() { buddy_allocator_->Release(); }
+
 }  // namespace allocation
 }  // namespace memory
 }  // namespace paddle
diff --git a/paddle/fluid/memory/allocation/thread_local_allocator.h b/paddle/fluid/memory/allocation/thread_local_allocator.h
index 10ca4b828a4bb508ed91d15f2649c3d0d5e1da9a..764509e75ba23a76a6d7c186f4a3daaa75302c8f 100644
--- a/paddle/fluid/memory/allocation/thread_local_allocator.h
+++ b/paddle/fluid/memory/allocation/thread_local_allocator.h
@@ -52,6 +52,7 @@ class ThreadLocalAllocatorImpl
   explicit ThreadLocalAllocatorImpl(const platform::Place& p);
   ThreadLocalAllocation* AllocateImpl(size_t size);
   void FreeImpl(ThreadLocalAllocation* allocation);
+  void ReleaseImpl();
 
  private:
   std::unique_ptr<memory::detail::BuddyAllocator> buddy_allocator_;
@@ -91,6 +92,9 @@ class ThreadLocalCUDAAllocator : public Allocator {
     auto allocator_impl = tl_allocation->GetAllocator();
     allocator_impl->FreeImpl(tl_allocation);
   }
+  void ReleaseImpl(const platform::Place& p) override {
+    return ThreadLocalCUDAAllocatorPool::Instance().Get(gpu_id_)->ReleaseImpl();
+  }
 
  private:
   int gpu_id_;
diff --git a/paddle/fluid/memory/allocation/thread_local_allocator_test.cc b/paddle/fluid/memory/allocation/thread_local_allocator_test.cc
index f9e2ea8c27a74c29e7b9bbea3ab30eadbfe48b3d..70fd3a48d7861ef6eb7ad8b8881fd5d22d5ab15b 100644
--- a/paddle/fluid/memory/allocation/thread_local_allocator_test.cc
+++ b/paddle/fluid/memory/allocation/thread_local_allocator_test.cc
@@ -62,6 +62,7 @@ TEST(ThreadLocalAllocator, cross_scope_release) {
         auto tl_allocator_impl =
             ThreadLocalCUDAAllocatorPool::Instance().Get(devices[j]);
         allocator_addresses[j][i] = tl_allocator_impl.get();
+        memory::Release(platform::CUDAPlace(devices[j]));
       }
     });
   }
diff --git a/paddle/fluid/memory/detail/buddy_allocator.cc b/paddle/fluid/memory/detail/buddy_allocator.cc
index 6ac99744d79380803925f973c5b39262685e1ff0..e7738d07147510f5f1895559ce7b11dd8b3fd69c 100644
--- a/paddle/fluid/memory/detail/buddy_allocator.cc
+++ b/paddle/fluid/memory/detail/buddy_allocator.cc
@@ -39,9 +39,10 @@ BuddyAllocator::~BuddyAllocator() {
   while (!pool_.empty()) {
     auto block = static_cast<MemoryBlock*>(std::get<2>(*pool_.begin()));
     auto desc = cache_.LoadDesc(block);
-    VLOG(10) << "Free from block (" << block << ", " << desc->get_size() << ")";
+    VLOG(10) << "Free from block (" << block << ", " << desc->get_total_size()
+             << ")";
 
-    system_allocator_->Free(block, desc->get_size(), desc->get_index());
+    system_allocator_->Free(block, desc->get_total_size(), desc->get_index());
     cache_.Invalidate(block);
     pool_.erase(pool_.begin());
   }
@@ -161,6 +162,39 @@ void BuddyAllocator::Free(void* p) {
       IndexSizeAddress(desc->get_index(), desc->get_total_size(), block));
 }
 
+void BuddyAllocator::Release() {
+  std::lock_guard<std::mutex> lock(mutex_);
+  int num = 0;
+  uint64_t bytes = 0;
+  bool del_flag = false;
+  for (auto iter = pool_.begin(); iter != pool_.end();) {
+    auto remain_size = std::get<1>(*iter);
+    auto remain_ptr = std::get<2>(*iter);
+    for (auto& chunk : chunks_) {
+      auto init_size = std::get<1>(chunk);
+      auto init_ptr = std::get<2>(chunk);
+
+      if (init_size == remain_size && init_ptr == remain_ptr) {
+        ++num;
+        bytes += init_size;
+        total_free_ -= init_size;
+        auto block = static_cast<MemoryBlock*>(std::get<2>(chunk));
+        system_allocator_->Free(init_ptr, init_size, std::get<0>(chunk));
+        cache_.Invalidate(block);
+        del_flag = true;
+        break;
+      }
+    }
+
+    if (del_flag) {
+      iter = pool_.erase(iter);
+    } else {
+      iter++;
+    }
+  }
+  VLOG(10) << "Release " << num << " chunk, Free " << bytes << " bytes.";
+}
+
 size_t BuddyAllocator::Used() { return total_used_; }
 size_t BuddyAllocator::GetMinChunkSize() { return min_chunk_size_; }
 size_t BuddyAllocator::GetMaxChunkSize() { return max_chunk_size_; }
@@ -213,6 +247,9 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool(
 
   total_free_ += allocate_bytes;
 
+  // record the chunk.
+  chunks_.insert(IndexSizeAddress(index, allocate_bytes, p));
+
   // dump the block into pool
   return pool_.insert(IndexSizeAddress(index, allocate_bytes, p)).first;
 }
diff --git a/paddle/fluid/memory/detail/buddy_allocator.h b/paddle/fluid/memory/detail/buddy_allocator.h
index 791f8b56277723c59ea47e60c0d8d9eec9745fc4..0bfc8918503b9e210f00774c665e54a104779fcf 100644
--- a/paddle/fluid/memory/detail/buddy_allocator.h
+++ b/paddle/fluid/memory/detail/buddy_allocator.h
@@ -40,6 +40,8 @@ class BuddyAllocator {
  public:
   void* Alloc(size_t unaligned_size);
   void Free(void* ptr);
+  // Release the unused memory pool, a real free operation for the OS.
+  void Release();
   size_t Used();
   size_t GetMinChunkSize();
   size_t GetMaxChunkSize();
@@ -92,6 +94,11 @@ class BuddyAllocator {
    */
   PoolSet pool_;
 
+  /**
+   * \brief Record the allocated chunks when Refill pool.
+   */
+  PoolSet chunks_;
+
  private:
   /*! Unify the metadata format between GPU and CPU allocations */
   MetadataCache cache_;
diff --git a/paddle/fluid/memory/detail/buddy_allocator_test.cc b/paddle/fluid/memory/detail/buddy_allocator_test.cc
index 1722acd10aa38e33f3c11aa8eac7cb50dce9fed4..90f7e33eb3540f6272df80296bba57c3d7d9b596 100644
--- a/paddle/fluid/memory/detail/buddy_allocator_test.cc
+++ b/paddle/fluid/memory/detail/buddy_allocator_test.cc
@@ -305,6 +305,23 @@ TEST(BuddyAllocator, SpeedAna) {
   std::cerr << "time cost " << diff.count() << std::endl;
 }
 
+TEST(BuddyAllocator, Release) {
+  // In a 8 GB machine, the pool size will be about 800 MB
+  FLAGS_fraction_of_gpu_memory_to_use = 0.1;
+  FLAGS_initial_gpu_memory_in_mb = 0;
+  FLAGS_reallocate_gpu_memory_in_mb = 0;
+
+  BuddyAllocator buddy_allocator(
+      std::unique_ptr<SystemAllocator>(new GPUAllocator(TEST_GPU_ID)),
+      platform::GpuMinChunkSize(), platform::GpuMaxChunkSize());
+
+  // Less than pool size
+  TestBuddyAllocator(&buddy_allocator, 10);
+  TestBuddyAllocator(&buddy_allocator, 10 << 10);
+  TestBuddyAllocator(&buddy_allocator, 50 << 20);
+
+  buddy_allocator.Release();
+}
 #endif
 
 }  // namespace detail
diff --git a/paddle/fluid/memory/malloc.cc b/paddle/fluid/memory/malloc.cc
index e01f030585a8330a2e9bcc2bc2a662f00f5cde1c..2fbde03b42bcc025312cc5980afa35b7e320236f 100644
--- a/paddle/fluid/memory/malloc.cc
+++ b/paddle/fluid/memory/malloc.cc
@@ -31,5 +31,9 @@ AllocationPtr Alloc(const platform::Place &place, size_t size) {
   return allocation::AllocatorFacade::Instance().Alloc(place, size);
 }
 
+void Release(const platform::Place &place) {
+  return allocation::AllocatorFacade::Instance().Release(place);
+}
+
 }  // namespace memory
 }  // namespace paddle
diff --git a/paddle/fluid/memory/malloc.h b/paddle/fluid/memory/malloc.h
index 73487795f752eab69e660154c2e35817b2c80368..3d6836e1d255b4de99672bec81e1ed226c3a9d14 100644
--- a/paddle/fluid/memory/malloc.h
+++ b/paddle/fluid/memory/malloc.h
@@ -38,5 +38,7 @@ extern AllocationPtr Alloc(const platform::Place& place, size_t size);
 
 extern AllocationPtr Alloc(const platform::DeviceContext& dev_ctx, size_t size);
 
+extern void Release(const platform::Place& place);
+
 }  // namespace memory
 }  // namespace paddle