// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/memory/allocation/allocator.h" #include #include #include #include #include "paddle/fluid/memory/allocation/aligned_allocator.h" #include "paddle/fluid/memory/allocation/allocator_facade.h" #include "paddle/fluid/memory/allocation/auto_increment_allocator.h" #include "paddle/fluid/memory/allocation/best_fit_allocator.h" #include "paddle/fluid/memory/allocation/conditional_allocator.h" #include "paddle/fluid/memory/allocation/cpu_allocator.h" #include "paddle/fluid/memory/allocation/locked_allocator.h" #include "paddle/fluid/memory/allocation/naive_managed_allocator.h" #include "paddle/fluid/memory/allocation/pinned_allocator.h" #include "paddle/fluid/memory/allocation/retry_allocator.h" #include "paddle/fluid/memory/allocation/zero_size_allocator.h" #include "paddle/fluid/platform/cuda_device_guard.h" #include "paddle/fluid/platform/gpu_info.h" #include "paddle/fluid/platform/place.h" #ifdef PADDLE_WITH_CUDA #include "paddle/fluid/memory/allocation/cuda_allocator.h" #endif DEFINE_int32( gpu_allocator_retry_time, 0, "The retry time (milliseconds) when allocator fails " "to allocate memory. No retry if this value is not greater than 0"); namespace paddle { namespace memory { namespace allocation { // TODO(yy): Dirty code here. This class should be configurable in runtime. class CPUManagedAllocator : public ManagedAllocator { public: CPUManagedAllocator() : normal_allocator_(NaiveManagedAllocator::Create( std::unique_ptr(new CPUAllocator()))), communication_allocator_(NaiveManagedAllocator::Create( std::unique_ptr(new CPUPinnedAllocator()))) {} std::unique_ptr Allocate(size_t size, Attr attr) override { if (attr == kCrossDevice) { return communication_allocator_->Allocate(size, attr); } else { return normal_allocator_->Allocate(size, attr); } } std::shared_ptr AllocateShared(size_t size, Attr attr) override { if (attr == kCrossDevice) { return communication_allocator_->AllocateShared(size, attr); } else { return normal_allocator_->AllocateShared(size, attr); } } bool IsAllocThreadSafe() const override { return true; } private: std::shared_ptr normal_allocator_; std::shared_ptr communication_allocator_; }; #ifdef PADDLE_WITH_CUDA // TODO(yy): Dirty code here. This class should be configurable in runtime. class CUDAManagedAllocator : public ManagedAllocator { public: explicit CUDAManagedAllocator(int dev_id) { platform::CUDADeviceGuard guard(dev_id); max_chunk_size_ = platform::GpuMaxChunkSize(); raw_allocator_ = NaiveManagedAllocator::Create(std::unique_ptr( new CUDAAllocator(platform::CUDAPlace(dev_id)))); if (max_chunk_size_ == 0) { default_allocator_ = raw_allocator_; } else { size_t available, total; platform::GpuMemoryUsage(&available, &total); size_t capacity = available / max_chunk_size_; if (capacity == 1) { VLOG(10) << "Create BestFitAllocator with chunk_size " << max_chunk_size_; default_allocator_ = BestFitAllocatorCreator(); } else { VLOG(10) << "Create AutoIncrementAllocator with chunk_size " << max_chunk_size_ << " and capacity " << capacity; default_allocator_ = std::make_shared( [this] { return std::move(BestFitAllocatorCreator()); }, capacity); } } auto* cond_allocator = new ConditionalAllocator(); cond_allocator ->AddAllocator( [this](size_t size, Attr attr) { return size < max_chunk_size_; }, default_allocator_) .AddAllocator( [](size_t size, Attr attr) { return true; // default case }, raw_allocator_); default_allocator_.reset(cond_allocator); } ~CUDAManagedAllocator() { // Specify destruct order. default_allocator_.reset(); chunks_.clear(); raw_allocator_.reset(); } std::unique_ptr Allocate(size_t size, Attr attr) override { return default_allocator_->Allocate(size, attr); } std::shared_ptr AllocateShared(size_t size, Attr attr) override { return default_allocator_->AllocateShared(size, attr); } std::shared_ptr BestFitAllocatorCreator() { chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_)); auto* allocation = chunks_.back().get(); std::unique_ptr unmanaged_allocator(new LockedAllocator( std::unique_ptr(new BestFitAllocator(allocation)))); if (FLAGS_gpu_allocator_retry_time <= 0) { VLOG(10) << "Create NaiveManagedAllocator without retry"; return std::make_shared>( NaiveManagedAllocator::Create(std::move(unmanaged_allocator))); } else { VLOG(10) << "Create RetryAllocator with retry_time " << FLAGS_gpu_allocator_retry_time << "ms"; return std::make_shared>(RetryAllocator::Create( std::move(unmanaged_allocator), static_cast(FLAGS_gpu_allocator_retry_time))); } } bool IsAllocThreadSafe() const override { return true; } private: size_t max_chunk_size_; std::vector> chunks_; std::shared_ptr raw_allocator_; std::shared_ptr default_allocator_; }; #endif class AllocatorFacadePrivate { public: std::unordered_map> allocators_; ~AllocatorFacadePrivate() = default; AllocatorFacadePrivate() { InitCPUAllocator(); InitCUDAAllocator(); WrapZeroSizeAllocator(); } private: void InitCPUAllocator() { allocators_[platform::CPUPlace()] = std::make_shared(); } void InitCUDAAllocator() { #ifdef PADDLE_WITH_CUDA for (int dev_id = 0; dev_id < platform::GetCUDADeviceCount(); ++dev_id) { allocators_[platform::CUDAPlace(dev_id)] = std::make_shared(dev_id); } #endif } void WrapZeroSizeAllocator() { for (auto& pair : allocators_) { pair.second = std::make_shared(pair.second, pair.first); } } }; // Pimpl. Make interface clean. AllocatorFacade::AllocatorFacade() : m_(new AllocatorFacadePrivate()) {} AllocatorFacade::~AllocatorFacade() { delete m_; } AllocatorFacade& AllocatorFacade::Instance() { static AllocatorFacade instance; return instance; } std::shared_ptr AllocatorFacade::AllocShared( const platform::Place& place, size_t size, Allocator::Attr attr) { return m_->allocators_.at(place)->AllocateShared(size, attr); } std::unique_ptr AllocatorFacade::Alloc(const platform::Place& place, size_t size, Allocator::Attr attr) { return m_->allocators_.at(place)->Allocate(size, attr); } } // namespace allocation } // namespace memory } // namespace paddle