diff --git a/paddle/fluid/memory/allocation/CMakeLists.txt b/paddle/fluid/memory/allocation/CMakeLists.txt
index 0a5a3b31542b5fd5dd71f7531e34233e6f735aba..20a922b406745877df15eb79d3052381937b6a15 100644
--- a/paddle/fluid/memory/allocation/CMakeLists.txt
+++ b/paddle/fluid/memory/allocation/CMakeLists.txt
@@ -4,7 +4,6 @@ set(ALLOCATOR_DEPS place stats profiler phi_backends device_context)
 set(ALLOCATOR_SRCS
     allocator.cc
     cpu_allocator.cc
-    locked_allocator.cc
     aligned_allocator.cc
     buffered_allocator.cc
     best_fit_allocator.cc
diff --git a/paddle/fluid/memory/allocation/best_fit_allocator.cc b/paddle/fluid/memory/allocation/best_fit_allocator.cc
index db450ea6944f78690ccf689b6b124a1bfc5cc593..e8b9980b4dbafd4ff935a9856b1f973f918d79ec 100644
--- a/paddle/fluid/memory/allocation/best_fit_allocator.cc
+++ b/paddle/fluid/memory/allocation/best_fit_allocator.cc
@@ -15,6 +15,7 @@
 #include "paddle/fluid/memory/allocation/best_fit_allocator.h"
 
 #include <cmath>
+#include <mutex>
 
 #include "paddle/fluid/platform/enforce.h"
 
@@ -120,6 +121,7 @@ size_t BestFitAllocator::NumFreeChunks() const {
   return num;
 }
 void BestFitAllocator::FreeImpl(phi::Allocation* allocation) {
+  std::lock_guard<SpinLock> guard(spinlock_);
   auto* bf_allocation = dynamic_cast<BestFitAllocation*>(allocation);
   PADDLE_ENFORCE_NOT_NULL(
       bf_allocation,
@@ -156,6 +158,7 @@ void BestFitAllocator::FreeImpl(phi::Allocation* allocation) {
   delete allocation;
 }
 phi::Allocation* BestFitAllocator::AllocateImpl(size_t size) {
+  std::lock_guard<SpinLock> guard(spinlock_);
   auto highest_set_bit = static_cast<size_t>(HighestBitPos(size));
   MapIt map_it;
   for (; highest_set_bit < free_chunks_.size(); ++highest_set_bit) {
diff --git a/paddle/fluid/memory/allocation/best_fit_allocator.h b/paddle/fluid/memory/allocation/best_fit_allocator.h
index 72242ea14156ab61f400b17deb852fd797caa8c3..7e3a18955ac67e441c3a31e54dbf2fcd37d124aa 100644
--- a/paddle/fluid/memory/allocation/best_fit_allocator.h
+++ b/paddle/fluid/memory/allocation/best_fit_allocator.h
@@ -20,6 +20,7 @@
 #include <map>
 
 #include "paddle/fluid/memory/allocation/allocator.h"
+#include "paddle/fluid/memory/allocation/spin_lock.h"
 #include "paddle/fluid/platform//place.h"
 
 namespace paddle {
@@ -112,6 +113,8 @@ class BestFitAllocator : public Allocator {
 
   size_t NumFreeChunks() const;
 
+  bool IsAllocThreadSafe() const override { return true; }
+
  private:
   size_t FreeSize() const;
   using MapIt = typename details::FreeChunkBin::value_type::iterator;
@@ -131,6 +134,7 @@ class BestFitAllocator : public Allocator {
   phi::Allocation* allocation_;  // not owned
   details::ChunkList chunks_;
   details::FreeChunkBin free_chunks_;
+  SpinLock spinlock_;
 };
 }  // namespace allocation
 }  // namespace memory
diff --git a/paddle/fluid/memory/allocation/best_fit_allocator_test.cc b/paddle/fluid/memory/allocation/best_fit_allocator_test.cc
index bbaa6b98f4cd2c09b2f472a5a73beaa4bd41c006..440fc85b57867b5836d4872d76cb28f1c3b8164d 100644
--- a/paddle/fluid/memory/allocation/best_fit_allocator_test.cc
+++ b/paddle/fluid/memory/allocation/best_fit_allocator_test.cc
@@ -22,7 +22,6 @@
 #include "gtest/gtest.h"
 #include "gtest/gtest_pred_impl.h"
 #include "paddle/fluid/memory/allocation/cpu_allocator.h"
-#include "paddle/fluid/memory/allocation/locked_allocator.h"
 
 namespace paddle {
 namespace memory {
@@ -100,10 +99,7 @@ TEST(BestFitAllocator, test_concurrent_cpu_allocation) {
   CPUAllocator allocator;
   auto global_allocation = allocator.Allocate(256UL * 1024 * 1024);
 
-  std::unique_ptr<Allocator> best_fit_allocator(
-      new BestFitAllocator(global_allocation.get()));
-
-  LockedAllocator locked_allocator(std::move(best_fit_allocator));
+  BestFitAllocator best_fit_allocator(global_allocation.get());
 
   auto th_main = [&](std::random_device::result_type seed) {
     std::default_random_engine engine(seed);
@@ -113,7 +109,7 @@ TEST(BestFitAllocator, test_concurrent_cpu_allocation) {
       size_t allocate_size = dist(engine);
 
       auto allocation =
-          locked_allocator.Allocate(sizeof(size_t) * allocate_size);
+          best_fit_allocator.Allocate(sizeof(size_t) * allocate_size);
 
       size_t* data = reinterpret_cast<size_t*>(allocation->ptr());
 
diff --git a/paddle/fluid/memory/allocation/best_fit_allocator_test.cu b/paddle/fluid/memory/allocation/best_fit_allocator_test.cu
index 44bcc10abae1a8f788b22445b025dffd3a03d9d6..b4081e08c9e34c8869d335e7d864b918b0701e58 100644
--- a/paddle/fluid/memory/allocation/best_fit_allocator_test.cu
+++ b/paddle/fluid/memory/allocation/best_fit_allocator_test.cu
@@ -21,7 +21,6 @@
 #include "paddle/fluid/memory/allocation/allocator_facade.h"
 #include "paddle/fluid/memory/allocation/best_fit_allocator.h"
 #include "paddle/fluid/memory/allocation/cuda_allocator.h"
-#include "paddle/fluid/memory/allocation/locked_allocator.h"
 #include "paddle/fluid/memory/memcpy.h"
 #include "paddle/fluid/platform/for_range.h"
 namespace paddle {
@@ -40,8 +39,7 @@ TEST(BestFitAllocator, concurrent_cuda) {
   CUDAAllocator allocator(platform::CUDAPlace(0));
   // 256 MB
   auto cuda_allocation = allocator.Allocate(256U * 1024 * 1024);
-  LockedAllocator concurrent_allocator(
-      std::unique_ptr<Allocator>(new BestFitAllocator(cuda_allocation.get())));
+  BestFitAllocator concurrent_allocator(cuda_allocation.get());
 
   platform::CUDAPlace gpu(0);
   phi::GPUContext dev_ctx(gpu);
diff --git a/paddle/fluid/memory/allocation/buffered_allocator_test.cc b/paddle/fluid/memory/allocation/buffered_allocator_test.cc
index c720cff3e9357931cfecd0d7e2cb42516e276942..6d95da9eabe105861e6f50dce350f5e03c69a7c0 100644
--- a/paddle/fluid/memory/allocation/buffered_allocator_test.cc
+++ b/paddle/fluid/memory/allocation/buffered_allocator_test.cc
@@ -20,37 +20,11 @@
 
 #include "paddle/fluid/memory/allocation/best_fit_allocator.h"
 #include "paddle/fluid/memory/allocation/cpu_allocator.h"
-#include "paddle/fluid/memory/allocation/locked_allocator.h"
 
 namespace paddle {
 namespace memory {
 namespace allocation {
 
-inline std::unique_ptr<BufferedAllocator> GetBufferedAllocator(
-    phi::Allocation *allocation, bool thread_safe) {
-  std::unique_ptr<Allocator> allocator(new BestFitAllocator(allocation));
-  if (thread_safe) {
-    allocator.reset(new LockedAllocator(std::move(allocator)));
-  }
-
-  return std::unique_ptr<BufferedAllocator>(
-      new BufferedAllocator(std::move(allocator)));
-}
-
-TEST(buffered_allocator, thread_safety) {
-  std::unique_ptr<CPUAllocator> allocator(new CPUAllocator());
-  auto chunk = allocator->Allocate(1 << 20);
-  {
-    auto buf_allocator = GetBufferedAllocator(chunk.get(), true);
-    ASSERT_EQ(buf_allocator->IsAllocThreadSafe(), true);
-  }
-
-  {
-    auto buf_allocator = GetBufferedAllocator(chunk.get(), false);
-    ASSERT_EQ(buf_allocator->IsAllocThreadSafe(), false);
-  }
-}
-
 class StubAllocation : public Allocation {
  public:
   using Allocation::Allocation;
@@ -136,12 +110,15 @@ TEST(buffered_allocator, lazy_free) {
 TEST(buffered_allocator, garbage_collection) {
   std::unique_ptr<CPUAllocator> cpu_allocator(new CPUAllocator());
   auto chunk = cpu_allocator->Allocate(2048);
-  auto allocator = GetBufferedAllocator(chunk.get(), false);
-  auto x1 = allocator->Allocate(1600);
-  auto x2 = allocator->Allocate(400);
+  std::unique_ptr<Allocator> allocator(new BestFitAllocator(chunk.get()));
+
+  auto buffered_allocator = std::unique_ptr<BufferedAllocator>(
+      new BufferedAllocator(std::move(allocator)));
+  auto x1 = buffered_allocator->Allocate(1600);
+  auto x2 = buffered_allocator->Allocate(400);
   x1 = nullptr;
   x2 = nullptr;
-  auto x3 = allocator->Allocate(1600);
+  auto x3 = buffered_allocator->Allocate(1600);
   ASSERT_NE(x3, nullptr);
   ASSERT_NE(x3->ptr(), nullptr);
 }
diff --git a/paddle/fluid/memory/allocation/locked_allocator.cc b/paddle/fluid/memory/allocation/locked_allocator.cc
deleted file mode 100644
index aec58229d018378ac1e11cd29bcab62cfed9f18f..0000000000000000000000000000000000000000
--- a/paddle/fluid/memory/allocation/locked_allocator.cc
+++ /dev/null
@@ -1,52 +0,0 @@
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/fluid/memory/allocation/locked_allocator.h"
-
-#include <mutex>  // NOLINT
-
-#include "paddle/fluid/platform/enforce.h"
-#include "paddle/fluid/platform/lock_guard_ptr.h"
-
-namespace paddle {
-namespace memory {
-namespace allocation {
-
-bool LockedAllocator::IsAllocThreadSafe() const { return true; }
-
-LockedAllocator::LockedAllocator(
-    std::shared_ptr<Allocator> underlying_allocator)
-    : underlying_allocator_(std::move(underlying_allocator)) {
-  PADDLE_ENFORCE_NOT_NULL(
-      underlying_allocator_,
-      platform::errors::InvalidArgument(
-          "Underlying allocator of LockedAllocator is NULL"));
-  if (!underlying_allocator_->IsAllocThreadSafe()) {
-    mtx_.reset(new std::mutex());
-  }
-}
-
-void LockedAllocator::FreeImpl(phi::Allocation *allocation) {
-  platform::LockGuardPtr<std::mutex> guard(mtx_);
-  underlying_allocator_->Free(allocation);
-}
-
-phi::Allocation *LockedAllocator::AllocateImpl(size_t size) {
-  platform::LockGuardPtr<std::mutex> guard(mtx_);
-  return underlying_allocator_->Allocate(size).release();
-}
-
-}  // namespace allocation
-}  // namespace memory
-}  // namespace paddle
diff --git a/paddle/fluid/memory/allocation/locked_allocator.h b/paddle/fluid/memory/allocation/locked_allocator.h
deleted file mode 100644
index ff504e7c8bea72d31f1e74ec8c359e0c149ef8bd..0000000000000000000000000000000000000000
--- a/paddle/fluid/memory/allocation/locked_allocator.h
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include <memory>
-#include <mutex>   // NOLINT
-#include <thread>  // NOLINT
-
-#include "paddle/fluid/memory/allocation/allocator.h"
-
-namespace paddle {
-namespace memory {
-namespace allocation {
-
-// A allocator to make underlying allocator thread safe.
-class LockedAllocator : public Allocator {
- public:
-  explicit LockedAllocator(std::shared_ptr<Allocator> underlying_allocator);
-  bool IsAllocThreadSafe() const override;
-
- protected:
-  void FreeImpl(phi::Allocation *allocation) override;
-  phi::Allocation *AllocateImpl(size_t size) override;
-
- private:
-  std::shared_ptr<Allocator> underlying_allocator_;
-  std::unique_ptr<std::mutex> mtx_;
-};
-
-}  // namespace allocation
-}  // namespace memory
-}  // namespace paddle
diff --git a/paddle/fluid/memory/allocation/retry_allocator_test.cc b/paddle/fluid/memory/allocation/retry_allocator_test.cc
index 7b6f99ac3522dd4b3cc3586c1264e828841de57f..d1872ee00b7b71b980ae5473b6d49bd868bc8e43 100644
--- a/paddle/fluid/memory/allocation/retry_allocator_test.cc
+++ b/paddle/fluid/memory/allocation/retry_allocator_test.cc
@@ -19,7 +19,6 @@
 #include "gtest/gtest.h"
 #include "paddle/fluid/memory/allocation/best_fit_allocator.h"
 #include "paddle/fluid/memory/allocation/cpu_allocator.h"
-#include "paddle/fluid/memory/allocation/locked_allocator.h"
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
 #include "paddle/fluid/memory/allocation/cuda_allocator.h"
 #endif
@@ -34,11 +33,6 @@ TEST(RetryAllocator, RetryAllocator) {
   size_t size = (1 << 20);
   auto cpu_allocation = cpu_allocator.Allocate(size);
 
-  std::unique_ptr<BestFitAllocator> best_fit_allocator(
-      new BestFitAllocator(cpu_allocation.get()));
-  std::unique_ptr<LockedAllocator> locked_allocator(
-      new LockedAllocator(std::move(best_fit_allocator)));
-
   size_t thread_num = 4;
   size_t sleep_time = 40;
   size_t extra_time = 20;
@@ -48,10 +42,8 @@ TEST(RetryAllocator, RetryAllocator) {
   {
     std::unique_ptr<BestFitAllocator> best_fit_allocator(
         new BestFitAllocator(cpu_allocation.get()));
-    std::unique_ptr<LockedAllocator> locked_allocator(
-        new LockedAllocator(std::move(best_fit_allocator)));
     allocators.push_back(std::make_shared<RetryAllocator>(
-        std::move(locked_allocator),
+        std::move(best_fit_allocator),
         (thread_num - 1) * (sleep_time + extra_time)));
   }