diff --git a/paddle/fluid/memory/allocation/CMakeLists.txt b/paddle/fluid/memory/allocation/CMakeLists.txt index 44a354cf223f1147aa016fedca7f0c0b7c6bf1f2..84d22ac96ca1ec25cd974a051aac2e4d52b49b59 100644 --- a/paddle/fluid/memory/allocation/CMakeLists.txt +++ b/paddle/fluid/memory/allocation/CMakeLists.txt @@ -33,7 +33,7 @@ else () endif() cc_library(aligned_allocator SRCS aligned_allocator.cc DEPS allocator) - +cc_library(auto_increment_allocator SRCS auto_increment_allocator.cc DEPS allocator) cc_library(allocator_facade SRCS allocator_facade.cc DEPS ${AllocatorFacadeDeps} cpu_allocator @@ -41,6 +41,7 @@ cc_library(allocator_facade SRCS allocator_facade.cc DEPS best_fit_allocator naive_managed_allocator aligned_allocator + auto_increment_allocator cuda_device_guard) nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade) diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc index 2a5fd608bcc95ab906544b2a782a4a45885539d7..260c787a740f63c14bf57d71b9eabc2bc8593e71 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.cc +++ b/paddle/fluid/memory/allocation/allocator_facade.cc @@ -17,6 +17,7 @@ #include #include "paddle/fluid/memory/allocation/aligned_allocator.h" #include "paddle/fluid/memory/allocation/allocator_facade.h" +#include "paddle/fluid/memory/allocation/auto_increment_allocator.h" #include "paddle/fluid/memory/allocation/best_fit_allocator.h" #include "paddle/fluid/memory/allocation/cpu_allocator.h" #include "paddle/fluid/memory/allocation/locked_allocator.h" @@ -33,6 +34,7 @@ namespace paddle { namespace memory { namespace allocation { +// TODO(yy): Dirty code here. This class should be configurable in runtime. class CPUManagedAllocator : public ManagedAllocator { public: CPUManagedAllocator() @@ -56,24 +58,59 @@ class CPUManagedAllocator : public ManagedAllocator { return normal_allocator_->AllocateShared(size, attr); } } + bool IsAllocThreadSafe() const override { return true; } private: std::shared_ptr normal_allocator_; std::shared_ptr communication_allocator_; }; -class AllocatorFacadePrivate { +// TODO(yy): Dirty code here. This class should be configurable in runtime. +class CUDAManagedAllocator : public ManagedAllocator { public: - std::map> allocators_; - std::vector> pre_allocations_; - std::vector> holding_allocators_; + explicit CUDAManagedAllocator(int dev_id) { + platform::CUDADeviceGuard guard(dev_id); + max_chunk_size_ = platform::GpuMaxChunkSize(); + raw_allocator_ = NaiveManagedAllocator::Create(std::unique_ptr( + new CUDAAllocator(platform::CUDAPlace(dev_id)))); + default_allocator_ = std::make_shared( + [this] { return std::move(BestFitAllocatorCreator()); }); + } - ~AllocatorFacadePrivate() { + ~CUDAManagedAllocator() { // Specify destruct order. - pre_allocations_.clear(); - allocators_.clear(); - holding_allocators_.clear(); + default_allocator_.reset(); + chunks_.clear(); + raw_allocator_.reset(); + } + + std::unique_ptr Allocate(size_t size, Attr attr) override { + return default_allocator_->Allocate(size, attr); + } + std::shared_ptr AllocateShared(size_t size, Attr attr) override { + return default_allocator_->AllocateShared(size, attr); + } + + std::shared_ptr BestFitAllocatorCreator() { + chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_)); + auto* allocation = chunks_.back().get(); + return NaiveManagedAllocator::Create( + std::unique_ptr(new BestFitAllocator(allocation))); } + bool IsAllocThreadSafe() const override { return true; } + + private: + size_t max_chunk_size_; + std::vector> chunks_; + std::shared_ptr raw_allocator_; + std::shared_ptr default_allocator_; +}; + +class AllocatorFacadePrivate { + public: + std::map> allocators_; + + ~AllocatorFacadePrivate() {} AllocatorFacadePrivate() { InitCPUAllocator(); @@ -88,19 +125,8 @@ class AllocatorFacadePrivate { void InitCUDAAllocator() { #ifdef PADDLE_WITH_CUDA for (int dev_id = 0; dev_id < platform::GetCUDADeviceCount(); ++dev_id) { - platform::CUDADeviceGuard guard(dev_id); - auto cuda_allocator = - NaiveManagedAllocator::Create(std::unique_ptr( - new CUDAAllocator(platform::CUDAPlace(dev_id)))); - auto allocation = cuda_allocator->Allocate(platform::GpuMaxChunkSize()); - auto allocator = NaiveManagedAllocator::Create(std::unique_ptr( - new LockedAllocator(std::unique_ptr( - new BestFitAllocator(allocation.get()))))); - - pre_allocations_.emplace_back(std::move(allocation)); - holding_allocators_.emplace_back(cuda_allocator); allocators_[platform::CUDAPlace(dev_id)] = - std::make_shared>(std::move(allocator)); + std::make_shared(dev_id); } #endif } diff --git a/paddle/fluid/memory/allocation/allocator_facade.h b/paddle/fluid/memory/allocation/allocator_facade.h index d780fb6e64b7ed2fd6f46aa2eb5b234447f998fd..a910e40badb80476ecd3d7761f44fc1d79b73982 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.h +++ b/paddle/fluid/memory/allocation/allocator_facade.h @@ -21,6 +21,9 @@ namespace paddle { namespace memory { namespace allocation { +// Allocator Facade is the interface exposed to other modules. +// All the configuration or dirty code under development should +// be hidden behind this facade. class AllocatorFacadePrivate; class AllocatorFacade { public: diff --git a/paddle/fluid/memory/allocation/auto_increment_allocator.cc b/paddle/fluid/memory/allocation/auto_increment_allocator.cc new file mode 100644 index 0000000000000000000000000000000000000000..1fac71b8321796417ac57f802e256c8b0df976b7 --- /dev/null +++ b/paddle/fluid/memory/allocation/auto_increment_allocator.cc @@ -0,0 +1,39 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/memory/allocation/auto_increment_allocator.h" + +namespace paddle { +namespace memory { +namespace allocation { + +std::unique_ptr AutoIncrementAllocator::Allocate( + size_t size, Allocator::Attr attr) { + return InvokeOrCreateUnderlyingAllocator([&](ManagedAllocator& allocator) { + return allocator.Allocate(size, attr); + }); +} + +std::shared_ptr AutoIncrementAllocator::AllocateShared( + size_t size, Allocator::Attr attr) { + return InvokeOrCreateUnderlyingAllocator([&](ManagedAllocator& allocator) { + return allocator.AllocateShared(size, attr); + }); +} + +bool AutoIncrementAllocator::IsAllocThreadSafe() const { return true; } + +} // namespace allocation +} // namespace memory +} // namespace paddle diff --git a/paddle/fluid/memory/allocation/auto_increment_allocator.h b/paddle/fluid/memory/allocation/auto_increment_allocator.h new file mode 100644 index 0000000000000000000000000000000000000000..9fe370b08a79ff6bf9e359ffa78b45e02ce8c89b --- /dev/null +++ b/paddle/fluid/memory/allocation/auto_increment_allocator.h @@ -0,0 +1,79 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include // NOLINT +#include +#include "paddle/fluid/memory/allocation/allocator.h" + +namespace paddle { +namespace memory { +namespace allocation { + +class AutoIncrementAllocator : public ManagedAllocator { + public: + using AllocatorCreator = std::function()>; + + template + explicit AutoIncrementAllocator(Creator&& creator) + : creator_(std::move(creator)), prev_success_allocator_{0} {} + std::unique_ptr Allocate(size_t size, Attr attr) override; + std::shared_ptr AllocateShared(size_t size, Attr attr) override; + bool IsAllocThreadSafe() const override; + + private: + // NOTE: here use template Callback, it can be inlined when -O3 + template + inline typename std::result_of::type + InvokeOrCreateUnderlyingAllocator(Callback callback) { + size_t retry_count = underlying_allocators_.size(); + auto cur = prev_success_allocator_; + while (retry_count-- > 0) { // until there retry count is zero + try { + auto res = callback(*underlying_allocators_[cur]); + { + std::lock_guard guard(mtx_); + prev_success_allocator_ = cur; + } + return std::move(res); + } catch (BadAlloc&) { + ++cur; + if (cur >= underlying_allocators_.size()) { + cur = 0; + } + } catch (...) { + // if there is another type of allocation, just rethrow it. + throw; + } + } + // No suitable allocator + { + std::lock_guard guard(mtx_); + underlying_allocators_.emplace_back(creator_()); + prev_success_allocator_ = underlying_allocators_.size() - 1; + return callback(*underlying_allocators_[prev_success_allocator_]); + } + } + + AllocatorCreator creator_; + std::vector underlying_allocators_; + size_t prev_success_allocator_{0}; + std::mutex mtx_; // NOLINT +}; +} // namespace allocation +} // namespace memory +} // namespace paddle