diff --git a/paddle/fluid/memory/allocation/CMakeLists.txt b/paddle/fluid/memory/allocation/CMakeLists.txt index 0a5a3b31542b5fd5dd71f7531e34233e6f735aba..20a922b406745877df15eb79d3052381937b6a15 100644 --- a/paddle/fluid/memory/allocation/CMakeLists.txt +++ b/paddle/fluid/memory/allocation/CMakeLists.txt @@ -4,7 +4,6 @@ set(ALLOCATOR_DEPS place stats profiler phi_backends device_context) set(ALLOCATOR_SRCS allocator.cc cpu_allocator.cc - locked_allocator.cc aligned_allocator.cc buffered_allocator.cc best_fit_allocator.cc diff --git a/paddle/fluid/memory/allocation/best_fit_allocator.cc b/paddle/fluid/memory/allocation/best_fit_allocator.cc index db450ea6944f78690ccf689b6b124a1bfc5cc593..e8b9980b4dbafd4ff935a9856b1f973f918d79ec 100644 --- a/paddle/fluid/memory/allocation/best_fit_allocator.cc +++ b/paddle/fluid/memory/allocation/best_fit_allocator.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/memory/allocation/best_fit_allocator.h" #include +#include #include "paddle/fluid/platform/enforce.h" @@ -120,6 +121,7 @@ size_t BestFitAllocator::NumFreeChunks() const { return num; } void BestFitAllocator::FreeImpl(phi::Allocation* allocation) { + std::lock_guard guard(spinlock_); auto* bf_allocation = dynamic_cast(allocation); PADDLE_ENFORCE_NOT_NULL( bf_allocation, @@ -156,6 +158,7 @@ void BestFitAllocator::FreeImpl(phi::Allocation* allocation) { delete allocation; } phi::Allocation* BestFitAllocator::AllocateImpl(size_t size) { + std::lock_guard guard(spinlock_); auto highest_set_bit = static_cast(HighestBitPos(size)); MapIt map_it; for (; highest_set_bit < free_chunks_.size(); ++highest_set_bit) { diff --git a/paddle/fluid/memory/allocation/best_fit_allocator.h b/paddle/fluid/memory/allocation/best_fit_allocator.h index 72242ea14156ab61f400b17deb852fd797caa8c3..7e3a18955ac67e441c3a31e54dbf2fcd37d124aa 100644 --- a/paddle/fluid/memory/allocation/best_fit_allocator.h +++ b/paddle/fluid/memory/allocation/best_fit_allocator.h @@ -20,6 +20,7 @@ #include #include "paddle/fluid/memory/allocation/allocator.h" +#include "paddle/fluid/memory/allocation/spin_lock.h" #include "paddle/fluid/platform//place.h" namespace paddle { @@ -112,6 +113,8 @@ class BestFitAllocator : public Allocator { size_t NumFreeChunks() const; + bool IsAllocThreadSafe() const override { return true; } + private: size_t FreeSize() const; using MapIt = typename details::FreeChunkBin::value_type::iterator; @@ -131,6 +134,7 @@ class BestFitAllocator : public Allocator { phi::Allocation* allocation_; // not owned details::ChunkList chunks_; details::FreeChunkBin free_chunks_; + SpinLock spinlock_; }; } // namespace allocation } // namespace memory diff --git a/paddle/fluid/memory/allocation/best_fit_allocator_test.cc b/paddle/fluid/memory/allocation/best_fit_allocator_test.cc index bbaa6b98f4cd2c09b2f472a5a73beaa4bd41c006..440fc85b57867b5836d4872d76cb28f1c3b8164d 100644 --- a/paddle/fluid/memory/allocation/best_fit_allocator_test.cc +++ b/paddle/fluid/memory/allocation/best_fit_allocator_test.cc @@ -22,7 +22,6 @@ #include "gtest/gtest.h" #include "gtest/gtest_pred_impl.h" #include "paddle/fluid/memory/allocation/cpu_allocator.h" -#include "paddle/fluid/memory/allocation/locked_allocator.h" namespace paddle { namespace memory { @@ -100,10 +99,7 @@ TEST(BestFitAllocator, test_concurrent_cpu_allocation) { CPUAllocator allocator; auto global_allocation = allocator.Allocate(256UL * 1024 * 1024); - std::unique_ptr best_fit_allocator( - new BestFitAllocator(global_allocation.get())); - - LockedAllocator locked_allocator(std::move(best_fit_allocator)); + BestFitAllocator best_fit_allocator(global_allocation.get()); auto th_main = [&](std::random_device::result_type seed) { std::default_random_engine engine(seed); @@ -113,7 +109,7 @@ TEST(BestFitAllocator, test_concurrent_cpu_allocation) { size_t allocate_size = dist(engine); auto allocation = - locked_allocator.Allocate(sizeof(size_t) * allocate_size); + best_fit_allocator.Allocate(sizeof(size_t) * allocate_size); size_t* data = reinterpret_cast(allocation->ptr()); diff --git a/paddle/fluid/memory/allocation/best_fit_allocator_test.cu b/paddle/fluid/memory/allocation/best_fit_allocator_test.cu index 44bcc10abae1a8f788b22445b025dffd3a03d9d6..b4081e08c9e34c8869d335e7d864b918b0701e58 100644 --- a/paddle/fluid/memory/allocation/best_fit_allocator_test.cu +++ b/paddle/fluid/memory/allocation/best_fit_allocator_test.cu @@ -21,7 +21,6 @@ #include "paddle/fluid/memory/allocation/allocator_facade.h" #include "paddle/fluid/memory/allocation/best_fit_allocator.h" #include "paddle/fluid/memory/allocation/cuda_allocator.h" -#include "paddle/fluid/memory/allocation/locked_allocator.h" #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/platform/for_range.h" namespace paddle { @@ -40,8 +39,7 @@ TEST(BestFitAllocator, concurrent_cuda) { CUDAAllocator allocator(platform::CUDAPlace(0)); // 256 MB auto cuda_allocation = allocator.Allocate(256U * 1024 * 1024); - LockedAllocator concurrent_allocator( - std::unique_ptr(new BestFitAllocator(cuda_allocation.get()))); + BestFitAllocator concurrent_allocator(cuda_allocation.get()); platform::CUDAPlace gpu(0); phi::GPUContext dev_ctx(gpu); diff --git a/paddle/fluid/memory/allocation/buffered_allocator_test.cc b/paddle/fluid/memory/allocation/buffered_allocator_test.cc index c720cff3e9357931cfecd0d7e2cb42516e276942..6d95da9eabe105861e6f50dce350f5e03c69a7c0 100644 --- a/paddle/fluid/memory/allocation/buffered_allocator_test.cc +++ b/paddle/fluid/memory/allocation/buffered_allocator_test.cc @@ -20,37 +20,11 @@ #include "paddle/fluid/memory/allocation/best_fit_allocator.h" #include "paddle/fluid/memory/allocation/cpu_allocator.h" -#include "paddle/fluid/memory/allocation/locked_allocator.h" namespace paddle { namespace memory { namespace allocation { -inline std::unique_ptr GetBufferedAllocator( - phi::Allocation *allocation, bool thread_safe) { - std::unique_ptr allocator(new BestFitAllocator(allocation)); - if (thread_safe) { - allocator.reset(new LockedAllocator(std::move(allocator))); - } - - return std::unique_ptr( - new BufferedAllocator(std::move(allocator))); -} - -TEST(buffered_allocator, thread_safety) { - std::unique_ptr allocator(new CPUAllocator()); - auto chunk = allocator->Allocate(1 << 20); - { - auto buf_allocator = GetBufferedAllocator(chunk.get(), true); - ASSERT_EQ(buf_allocator->IsAllocThreadSafe(), true); - } - - { - auto buf_allocator = GetBufferedAllocator(chunk.get(), false); - ASSERT_EQ(buf_allocator->IsAllocThreadSafe(), false); - } -} - class StubAllocation : public Allocation { public: using Allocation::Allocation; @@ -136,12 +110,15 @@ TEST(buffered_allocator, lazy_free) { TEST(buffered_allocator, garbage_collection) { std::unique_ptr cpu_allocator(new CPUAllocator()); auto chunk = cpu_allocator->Allocate(2048); - auto allocator = GetBufferedAllocator(chunk.get(), false); - auto x1 = allocator->Allocate(1600); - auto x2 = allocator->Allocate(400); + std::unique_ptr allocator(new BestFitAllocator(chunk.get())); + + auto buffered_allocator = std::unique_ptr( + new BufferedAllocator(std::move(allocator))); + auto x1 = buffered_allocator->Allocate(1600); + auto x2 = buffered_allocator->Allocate(400); x1 = nullptr; x2 = nullptr; - auto x3 = allocator->Allocate(1600); + auto x3 = buffered_allocator->Allocate(1600); ASSERT_NE(x3, nullptr); ASSERT_NE(x3->ptr(), nullptr); } diff --git a/paddle/fluid/memory/allocation/locked_allocator.cc b/paddle/fluid/memory/allocation/locked_allocator.cc deleted file mode 100644 index aec58229d018378ac1e11cd29bcab62cfed9f18f..0000000000000000000000000000000000000000 --- a/paddle/fluid/memory/allocation/locked_allocator.cc +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/memory/allocation/locked_allocator.h" - -#include // NOLINT - -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/lock_guard_ptr.h" - -namespace paddle { -namespace memory { -namespace allocation { - -bool LockedAllocator::IsAllocThreadSafe() const { return true; } - -LockedAllocator::LockedAllocator( - std::shared_ptr underlying_allocator) - : underlying_allocator_(std::move(underlying_allocator)) { - PADDLE_ENFORCE_NOT_NULL( - underlying_allocator_, - platform::errors::InvalidArgument( - "Underlying allocator of LockedAllocator is NULL")); - if (!underlying_allocator_->IsAllocThreadSafe()) { - mtx_.reset(new std::mutex()); - } -} - -void LockedAllocator::FreeImpl(phi::Allocation *allocation) { - platform::LockGuardPtr guard(mtx_); - underlying_allocator_->Free(allocation); -} - -phi::Allocation *LockedAllocator::AllocateImpl(size_t size) { - platform::LockGuardPtr guard(mtx_); - return underlying_allocator_->Allocate(size).release(); -} - -} // namespace allocation -} // namespace memory -} // namespace paddle diff --git a/paddle/fluid/memory/allocation/locked_allocator.h b/paddle/fluid/memory/allocation/locked_allocator.h deleted file mode 100644 index ff504e7c8bea72d31f1e74ec8c359e0c149ef8bd..0000000000000000000000000000000000000000 --- a/paddle/fluid/memory/allocation/locked_allocator.h +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#pragma once -#include -#include // NOLINT -#include // NOLINT - -#include "paddle/fluid/memory/allocation/allocator.h" - -namespace paddle { -namespace memory { -namespace allocation { - -// A allocator to make underlying allocator thread safe. -class LockedAllocator : public Allocator { - public: - explicit LockedAllocator(std::shared_ptr underlying_allocator); - bool IsAllocThreadSafe() const override; - - protected: - void FreeImpl(phi::Allocation *allocation) override; - phi::Allocation *AllocateImpl(size_t size) override; - - private: - std::shared_ptr underlying_allocator_; - std::unique_ptr mtx_; -}; - -} // namespace allocation -} // namespace memory -} // namespace paddle diff --git a/paddle/fluid/memory/allocation/retry_allocator_test.cc b/paddle/fluid/memory/allocation/retry_allocator_test.cc index 7b6f99ac3522dd4b3cc3586c1264e828841de57f..d1872ee00b7b71b980ae5473b6d49bd868bc8e43 100644 --- a/paddle/fluid/memory/allocation/retry_allocator_test.cc +++ b/paddle/fluid/memory/allocation/retry_allocator_test.cc @@ -19,7 +19,6 @@ #include "gtest/gtest.h" #include "paddle/fluid/memory/allocation/best_fit_allocator.h" #include "paddle/fluid/memory/allocation/cpu_allocator.h" -#include "paddle/fluid/memory/allocation/locked_allocator.h" #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #include "paddle/fluid/memory/allocation/cuda_allocator.h" #endif @@ -34,11 +33,6 @@ TEST(RetryAllocator, RetryAllocator) { size_t size = (1 << 20); auto cpu_allocation = cpu_allocator.Allocate(size); - std::unique_ptr best_fit_allocator( - new BestFitAllocator(cpu_allocation.get())); - std::unique_ptr locked_allocator( - new LockedAllocator(std::move(best_fit_allocator))); - size_t thread_num = 4; size_t sleep_time = 40; size_t extra_time = 20; @@ -48,10 +42,8 @@ TEST(RetryAllocator, RetryAllocator) { { std::unique_ptr best_fit_allocator( new BestFitAllocator(cpu_allocation.get())); - std::unique_ptr locked_allocator( - new LockedAllocator(std::move(best_fit_allocator))); allocators.push_back(std::make_shared( - std::move(locked_allocator), + std::move(best_fit_allocator), (thread_num - 1) * (sleep_time + extra_time))); }