提交 311b8f2f 编写于 作者: Y Yu Yang

Refine Allocator facade

上级 2f16f47e
......@@ -33,7 +33,7 @@ else ()
endif()
cc_library(aligned_allocator SRCS aligned_allocator.cc DEPS allocator)
cc_library(auto_increment_allocator SRCS auto_increment_allocator.cc DEPS allocator)
cc_library(allocator_facade SRCS allocator_facade.cc DEPS
${AllocatorFacadeDeps}
cpu_allocator
......@@ -41,6 +41,7 @@ cc_library(allocator_facade SRCS allocator_facade.cc DEPS
best_fit_allocator
naive_managed_allocator
aligned_allocator
auto_increment_allocator
cuda_device_guard)
nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade)
......@@ -17,6 +17,7 @@
#include <vector>
#include "paddle/fluid/memory/allocation/aligned_allocator.h"
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/fluid/memory/allocation/auto_increment_allocator.h"
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
#include "paddle/fluid/memory/allocation/locked_allocator.h"
......@@ -33,6 +34,7 @@ namespace paddle {
namespace memory {
namespace allocation {
// TODO(yy): Dirty code here. This class should be configurable in runtime.
class CPUManagedAllocator : public ManagedAllocator {
public:
CPUManagedAllocator()
......@@ -56,24 +58,59 @@ class CPUManagedAllocator : public ManagedAllocator {
return normal_allocator_->AllocateShared(size, attr);
}
}
bool IsAllocThreadSafe() const override { return true; }
private:
std::shared_ptr<ManagedAllocator> normal_allocator_;
std::shared_ptr<ManagedAllocator> communication_allocator_;
};
class AllocatorFacadePrivate {
// TODO(yy): Dirty code here. This class should be configurable in runtime.
class CUDAManagedAllocator : public ManagedAllocator {
public:
std::map<platform::Place, std::shared_ptr<ManagedAllocator>> allocators_;
std::vector<std::unique_ptr<Allocation>> pre_allocations_;
std::vector<std::shared_ptr<Allocator>> holding_allocators_;
explicit CUDAManagedAllocator(int dev_id) {
platform::CUDADeviceGuard guard(dev_id);
max_chunk_size_ = platform::GpuMaxChunkSize();
raw_allocator_ = NaiveManagedAllocator::Create(std::unique_ptr<Allocator>(
new CUDAAllocator(platform::CUDAPlace(dev_id))));
default_allocator_ = std::make_shared<AutoIncrementAllocator>(
[this] { return std::move(BestFitAllocatorCreator()); });
}
~AllocatorFacadePrivate() {
~CUDAManagedAllocator() {
// Specify destruct order.
pre_allocations_.clear();
allocators_.clear();
holding_allocators_.clear();
default_allocator_.reset();
chunks_.clear();
raw_allocator_.reset();
}
std::unique_ptr<Allocation> Allocate(size_t size, Attr attr) override {
return default_allocator_->Allocate(size, attr);
}
std::shared_ptr<Allocation> AllocateShared(size_t size, Attr attr) override {
return default_allocator_->AllocateShared(size, attr);
}
std::shared_ptr<ManagedAllocator> BestFitAllocatorCreator() {
chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_));
auto* allocation = chunks_.back().get();
return NaiveManagedAllocator::Create(
std::unique_ptr<Allocator>(new BestFitAllocator(allocation)));
}
bool IsAllocThreadSafe() const override { return true; }
private:
size_t max_chunk_size_;
std::vector<std::unique_ptr<Allocation>> chunks_;
std::shared_ptr<ManagedAllocator> raw_allocator_;
std::shared_ptr<ManagedAllocator> default_allocator_;
};
class AllocatorFacadePrivate {
public:
std::map<platform::Place, std::shared_ptr<ManagedAllocator>> allocators_;
~AllocatorFacadePrivate() {}
AllocatorFacadePrivate() {
InitCPUAllocator();
......@@ -88,19 +125,8 @@ class AllocatorFacadePrivate {
void InitCUDAAllocator() {
#ifdef PADDLE_WITH_CUDA
for (int dev_id = 0; dev_id < platform::GetCUDADeviceCount(); ++dev_id) {
platform::CUDADeviceGuard guard(dev_id);
auto cuda_allocator =
NaiveManagedAllocator::Create(std::unique_ptr<Allocator>(
new CUDAAllocator(platform::CUDAPlace(dev_id))));
auto allocation = cuda_allocator->Allocate(platform::GpuMaxChunkSize());
auto allocator = NaiveManagedAllocator::Create(std::unique_ptr<Allocator>(
new LockedAllocator(std::unique_ptr<Allocator>(
new BestFitAllocator(allocation.get())))));
pre_allocations_.emplace_back(std::move(allocation));
holding_allocators_.emplace_back(cuda_allocator);
allocators_[platform::CUDAPlace(dev_id)] =
std::make_shared<AlignedAllocator<64>>(std::move(allocator));
std::make_shared<CUDAManagedAllocator>(dev_id);
}
#endif
}
......
......@@ -21,6 +21,9 @@ namespace paddle {
namespace memory {
namespace allocation {
// Allocator Facade is the interface exposed to other modules.
// All the configuration or dirty code under development should
// be hidden behind this facade.
class AllocatorFacadePrivate;
class AllocatorFacade {
public:
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/auto_increment_allocator.h"
namespace paddle {
namespace memory {
namespace allocation {
std::unique_ptr<Allocation> AutoIncrementAllocator::Allocate(
size_t size, Allocator::Attr attr) {
return InvokeOrCreateUnderlyingAllocator([&](ManagedAllocator& allocator) {
return allocator.Allocate(size, attr);
});
}
std::shared_ptr<Allocation> AutoIncrementAllocator::AllocateShared(
size_t size, Allocator::Attr attr) {
return InvokeOrCreateUnderlyingAllocator([&](ManagedAllocator& allocator) {
return allocator.AllocateShared(size, attr);
});
}
bool AutoIncrementAllocator::IsAllocThreadSafe() const { return true; }
} // namespace allocation
} // namespace memory
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <functional>
#include <memory>
#include <thread> // NOLINT
#include <vector>
#include "paddle/fluid/memory/allocation/allocator.h"
namespace paddle {
namespace memory {
namespace allocation {
class AutoIncrementAllocator : public ManagedAllocator {
public:
using AllocatorCreator = std::function<std::shared_ptr<ManagedAllocator>()>;
template <typename Creator>
explicit AutoIncrementAllocator(Creator&& creator)
: creator_(std::move(creator)), prev_success_allocator_{0} {}
std::unique_ptr<Allocation> Allocate(size_t size, Attr attr) override;
std::shared_ptr<Allocation> AllocateShared(size_t size, Attr attr) override;
bool IsAllocThreadSafe() const override;
private:
// NOTE: here use template Callback, it can be inlined when -O3
template <typename Callback>
inline typename std::result_of<Callback(ManagedAllocator&)>::type
InvokeOrCreateUnderlyingAllocator(Callback callback) {
size_t retry_count = underlying_allocators_.size();
auto cur = prev_success_allocator_;
while (retry_count-- > 0) { // until there retry count is zero
try {
auto res = callback(*underlying_allocators_[cur]);
{
std::lock_guard<std::mutex> guard(mtx_);
prev_success_allocator_ = cur;
}
return std::move(res);
} catch (BadAlloc&) {
++cur;
if (cur >= underlying_allocators_.size()) {
cur = 0;
}
} catch (...) {
// if there is another type of allocation, just rethrow it.
throw;
}
}
// No suitable allocator
{
std::lock_guard<std::mutex> guard(mtx_);
underlying_allocators_.emplace_back(creator_());
prev_success_allocator_ = underlying_allocators_.size() - 1;
return callback(*underlying_allocators_[prev_success_allocator_]);
}
}
AllocatorCreator creator_;
std::vector<AllocatorCreator::result_type> underlying_allocators_;
size_t prev_success_allocator_{0};
std::mutex mtx_; // NOLINT
};
} // namespace allocation
} // namespace memory
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册