未验证 提交 3ece61f7 编写于 作者: Z Zeng Jinle 提交者: GitHub

Remove attribute in Allocator::Allocate (#17878)

* remove attribute in Allocator::Allocate, test=develop

* fix travis ci error, test=develop
上级 33d1e565
...@@ -12,8 +12,7 @@ ...@@ -12,8 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifndef PADDLE_FLUID_FRAMEWORK_IR_LOCK_FREE_OPTIMIZE_PASS_H_ #pragma once
#define PADDLE_FLUID_FRAMEWORK_IR_LOCK_FREE_OPTIMIZE_PASS_H_
#include <string> #include <string>
#include <vector> #include <vector>
...@@ -126,5 +125,3 @@ class LockFreeOptimizePass : public Pass { ...@@ -126,5 +125,3 @@ class LockFreeOptimizePass : public Pass {
} // namespace ir } // namespace ir
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
#endif // PADDLE_FLUID_FRAMEWORK_IR_LOCK_FREE_OPTIMIZE_PASS_H_
...@@ -81,8 +81,7 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place, ...@@ -81,8 +81,7 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
const char* var_name) { const char* var_name) {
auto x = scope->Var(var_name); auto x = scope->Var(var_name);
auto tensor = x->GetMutable<LoDTensor>(); auto tensor = x->GetMutable<LoDTensor>();
tensor->mutable_data(place, proto::VarType::FP32, tensor->mutable_data(place, proto::VarType::FP32, 1);
::paddle::memory::Allocator::kDefault, 1);
} }
void MainTest(bool convWithExistingBias) { void MainTest(bool convWithExistingBias) {
......
...@@ -110,8 +110,7 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place, ...@@ -110,8 +110,7 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
const char* var_name) { const char* var_name) {
auto x = scope->Var(var_name); auto x = scope->Var(var_name);
auto tensor = x->GetMutable<LoDTensor>(); auto tensor = x->GetMutable<LoDTensor>();
tensor->mutable_data(place, proto::VarType::FP32, tensor->mutable_data(place, proto::VarType::FP32, 1);
::paddle::memory::Allocator::kDefault, 1);
} }
void MainTest(const ProgramDesc& prog, int conv_count, int pool_count, void MainTest(const ProgramDesc& prog, int conv_count, int pool_count,
......
...@@ -102,8 +102,7 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place, ...@@ -102,8 +102,7 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
const char* var_name) { const char* var_name) {
auto x = scope->Var(var_name); auto x = scope->Var(var_name);
auto tensor = x->GetMutable<LoDTensor>(); auto tensor = x->GetMutable<LoDTensor>();
tensor->mutable_data(place, proto::VarType::FP32, tensor->mutable_data(place, proto::VarType::FP32, 1);
::paddle::memory::Allocator::kDefault, 1);
} }
void MainTest(const ProgramDesc& prog, int removed_nodes_num) { void MainTest(const ProgramDesc& prog, int removed_nodes_num) {
......
...@@ -377,12 +377,12 @@ class ExecutionContext { ...@@ -377,12 +377,12 @@ class ExecutionContext {
} }
template <typename T> template <typename T>
T& GetKernelConfig(int idx) const { T& GetKernelConfig(size_t idx) const {
PADDLE_ENFORCE( PADDLE_ENFORCE(
kernel_configs_ && kernel_configs_->size() > static_cast<size_t>(idx), kernel_configs_ && kernel_configs_->size() > static_cast<size_t>(idx),
"%s selected kernel doesn't have kernel config %lu <= %d", "%s selected kernel doesn't have kernel config %lu <= %lu",
op_.Type().c_str(), kernel_configs_->size(), idx); op_.Type().c_str(), kernel_configs_->size(), idx);
return *boost::get<std::shared_ptr<T>>(kernel_configs_->at(idx)); return *boost::get<std::shared_ptr<T>>((*kernel_configs_)[idx]);
} }
private: private:
......
...@@ -103,7 +103,7 @@ TAlgorithm AlgorithmsCache<TAlgorithm>::GetAlgorithm( ...@@ -103,7 +103,7 @@ TAlgorithm AlgorithmsCache<TAlgorithm>::GetAlgorithm(
++search_times_; ++search_times_;
return algo; return algo;
} }
TAlgorithm algo; TAlgorithm algo{};
int64_t min = static_cast<uint64_t>(INT_MAX); int64_t min = static_cast<uint64_t>(INT_MAX);
for (const auto& m : hash_) { for (const auto& m : hash_) {
if (m.first < min) { if (m.first < min) {
......
...@@ -35,7 +35,6 @@ size_t Tensor::memory_size() const { ...@@ -35,7 +35,6 @@ size_t Tensor::memory_size() const {
} }
void* Tensor::mutable_data(platform::Place place, proto::VarType::Type type, void* Tensor::mutable_data(platform::Place place, proto::VarType::Type type,
memory::Allocator::Attr attr,
size_t requested_size) { size_t requested_size) {
type_ = type; type_ = type;
PADDLE_ENFORCE_GE(numel(), 0, PADDLE_ENFORCE_GE(numel(), 0,
...@@ -50,18 +49,17 @@ void* Tensor::mutable_data(platform::Place place, proto::VarType::Type type, ...@@ -50,18 +49,17 @@ void* Tensor::mutable_data(platform::Place place, proto::VarType::Type type,
/* some versions of boost::variant don't have operator!= */ /* some versions of boost::variant don't have operator!= */
if (holder_ == nullptr || !(holder_->place() == place) || if (holder_ == nullptr || !(holder_->place() == place) ||
holder_->size() < size + offset_) { holder_->size() < size + offset_) {
holder_ = memory::AllocShared(place, size, attr); holder_ = memory::AllocShared(place, size);
offset_ = 0; offset_ = 0;
} }
return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(holder_->ptr()) + return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
offset_); offset_);
} }
void* Tensor::mutable_data(platform::Place place, memory::Allocator::Attr attr, void* Tensor::mutable_data(platform::Place place, size_t requested_size) {
size_t requested_size) {
PADDLE_ENFORCE(this->holder_ != nullptr, PADDLE_ENFORCE(this->holder_ != nullptr,
"Cannot invoke mutable data if current hold nothing."); "Cannot invoke mutable data if current hold nothing.");
return mutable_data(place, type_, attr, requested_size); return mutable_data(place, type_, requested_size);
} }
Tensor& Tensor::ShareDataWith(const Tensor& src) { Tensor& Tensor::ShareDataWith(const Tensor& src) {
......
...@@ -87,17 +87,12 @@ class Tensor { ...@@ -87,17 +87,12 @@ class Tensor {
* @note If not exist, then allocation. * @note If not exist, then allocation.
*/ */
template <typename T> template <typename T>
T* mutable_data(platform::Place place, T* mutable_data(platform::Place place, size_t requested_size = 0);
memory::Allocator::Attr attr = memory::Allocator::kDefault,
size_t requested_size = 0);
void* mutable_data(platform::Place place, proto::VarType::Type type, void* mutable_data(platform::Place place, proto::VarType::Type type,
memory::Allocator::Attr attr = memory::Allocator::kDefault,
size_t requested_size = 0); size_t requested_size = 0);
void* mutable_data(platform::Place place, void* mutable_data(platform::Place place, size_t requested_size = 0);
memory::Allocator::Attr attr = memory::Allocator::kDefault,
size_t requested_size = 0);
/** /**
* @brief Return a pointer to mutable memory block. * @brief Return a pointer to mutable memory block.
...@@ -109,9 +104,7 @@ class Tensor { ...@@ -109,9 +104,7 @@ class Tensor {
* @note If not exist, then allocation. * @note If not exist, then allocation.
*/ */
template <typename T> template <typename T>
T* mutable_data(DDim dims, platform::Place place, T* mutable_data(DDim dims, platform::Place place, size_t requested_size = 0);
memory::Allocator::Attr attr = memory::Allocator::kDefault,
size_t requested_size = 0);
/*! Return the dimensions of the memory block. */ /*! Return the dimensions of the memory block. */
const DDim& dims() const; const DDim& dims() const;
......
...@@ -49,20 +49,17 @@ inline T* Tensor::data() { ...@@ -49,20 +49,17 @@ inline T* Tensor::data() {
template <typename T> template <typename T>
inline T* Tensor::mutable_data(DDim dims, platform::Place place, inline T* Tensor::mutable_data(DDim dims, platform::Place place,
memory::Allocator::Attr attr,
size_t requested_size) { size_t requested_size) {
static_assert(std::is_pod<T>::value, "T must be POD"); static_assert(std::is_pod<T>::value, "T must be POD");
Resize(dims); Resize(dims);
return mutable_data<T>(place, attr, requested_size); return mutable_data<T>(place, requested_size);
} }
template <typename T> template <typename T>
inline T* Tensor::mutable_data(platform::Place place, inline T* Tensor::mutable_data(platform::Place place, size_t requested_size) {
memory::Allocator::Attr attr,
size_t requested_size) {
static_assert(std::is_pod<T>::value, "T must be POD"); static_assert(std::is_pod<T>::value, "T must be POD");
return reinterpret_cast<T*>( return reinterpret_cast<T*>(
mutable_data(place, DataTypeTrait<T>::DataType, attr, requested_size)); mutable_data(place, DataTypeTrait<T>::DataType, requested_size));
} }
inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) { inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) {
......
...@@ -89,9 +89,8 @@ class AlignedAllocator : public ThinAlignedAllocator { ...@@ -89,9 +89,8 @@ class AlignedAllocator : public ThinAlignedAllocator {
using ThinAlignedAllocator::ThinAlignedAllocator; using ThinAlignedAllocator::ThinAlignedAllocator;
protected: protected:
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override { Allocation* AllocateImpl(size_t size) override {
auto raw_allocation = auto raw_allocation = underlying_allocator_->Allocate(size + kAlignment);
underlying_allocator_->Allocate(size + kAlignment, attr);
return new AlignedAllocation<kAlignment>(std::move(raw_allocation), size); return new AlignedAllocation<kAlignment>(std::move(raw_allocation), size);
} }
......
...@@ -14,8 +14,6 @@ ...@@ -14,8 +14,6 @@
#include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/memory/allocation/allocator.h"
#include <functional>
namespace paddle { namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
......
...@@ -146,42 +146,8 @@ class Allocation { ...@@ -146,42 +146,8 @@ class Allocation {
}; };
// Base interface class of memory Allocator. // Base interface class of memory Allocator.
// To allocate a memory, allocator needs two parameters:
// 1. size of bytes.
// 2. Attribute of memory.
// NOTE: the attribute of memory might be ignored if the allocator does not
// care it.
class Allocator { class Allocator {
public: public:
enum Attr {
kDefault = 0, // Default attribute. Uses the fast or stablest allocation
// algorithm.
kFixedHuge = 1, // The allocation may not be freed until the program
// ends. e.g., `Parameters` and `Momentum`.
kFluxHuge = 2, // The allocation may create and freed frequently and the
// allocation is considerable huge. Like `activations`
// and gradients.
kScratchpad =
3, // The `Scratchpad` memory is allocated and freed very soon,
// usually within an operator or aux memory.
// Like CUDNN workspace, AUX memory in batch norm, etc.
//
// https://en.wikipedia.org/wiki/Scratchpad_memory
kCrossDevice =
4, // The memory used cross-device memory copy/communication.
// For example:
// 1. it can use an `pinned` memory for CPU-GPU
// communication.
// 2. it can use an `registered` memory for RDMA
// communication.
NumOfAttrs = 5 // The number of all attributes. It is used internally.
};
virtual ~Allocator() {} virtual ~Allocator() {}
class AllocationDeleter { class AllocationDeleter {
...@@ -195,8 +161,8 @@ class Allocator { ...@@ -195,8 +161,8 @@ class Allocator {
using AllocationPtr = std::unique_ptr<Allocation, AllocationDeleter>; using AllocationPtr = std::unique_ptr<Allocation, AllocationDeleter>;
// Allocate an allocation. // Allocate an allocation.
inline AllocationPtr Allocate(size_t size, Allocator::Attr attr = kDefault) { inline AllocationPtr Allocate(size_t size) {
auto ptr = AllocateImpl(size, attr); auto ptr = AllocateImpl(size);
ptr->RegisterDecoratedAllocator(this); ptr->RegisterDecoratedAllocator(this);
return AllocationPtr(ptr); return AllocationPtr(ptr);
} }
...@@ -211,7 +177,7 @@ class Allocator { ...@@ -211,7 +177,7 @@ class Allocator {
virtual bool IsAllocThreadSafe() const; virtual bool IsAllocThreadSafe() const;
protected: protected:
virtual Allocation* AllocateImpl(size_t size, Allocator::Attr attr) = 0; virtual Allocation* AllocateImpl(size_t size) = 0;
virtual void FreeImpl(Allocation* allocation); virtual void FreeImpl(Allocation* allocation);
}; };
......
...@@ -67,8 +67,8 @@ class CPUManagedAllocator : public Allocator { ...@@ -67,8 +67,8 @@ class CPUManagedAllocator : public Allocator {
bool IsAllocThreadSafe() const override { return true; } bool IsAllocThreadSafe() const override { return true; }
protected: protected:
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override { Allocation* AllocateImpl(size_t size) override {
return normal_allocator_->Allocate(size, attr).release(); return normal_allocator_->Allocate(size).release();
} }
private: private:
...@@ -101,11 +101,10 @@ class ChunkedAllocator : public Allocator { ...@@ -101,11 +101,10 @@ class ChunkedAllocator : public Allocator {
auto* cond_allocator = new ConditionalAllocator(); auto* cond_allocator = new ConditionalAllocator();
cond_allocator cond_allocator
->AddAllocator( ->AddAllocator([this](size_t size) { return size < max_chunk_size_; },
[this](size_t size, Attr attr) { return size < max_chunk_size_; }, default_allocator_)
default_allocator_)
.AddAllocator( .AddAllocator(
[](size_t size, Attr attr) { [](size_t size) {
return true; // default case return true; // default case
}, },
raw_allocator_); raw_allocator_);
...@@ -133,8 +132,8 @@ class ChunkedAllocator : public Allocator { ...@@ -133,8 +132,8 @@ class ChunkedAllocator : public Allocator {
bool IsAllocThreadSafe() const override { return true; } bool IsAllocThreadSafe() const override { return true; }
protected: protected:
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override { Allocation* AllocateImpl(size_t size) override {
return default_allocator_->Allocate(size, attr).release(); return default_allocator_->Allocate(size).release();
} }
protected: protected:
...@@ -263,7 +262,7 @@ class AllocatorFacadePrivate { ...@@ -263,7 +262,7 @@ class AllocatorFacadePrivate {
explicit ZeroSizeAllocator(platform::Place place) : place_(place) {} explicit ZeroSizeAllocator(platform::Place place) : place_(place) {}
protected: protected:
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override { Allocation* AllocateImpl(size_t size) override {
return new Allocation(nullptr, 0, place_); return new Allocation(nullptr, 0, place_);
} }
...@@ -304,13 +303,13 @@ AllocatorFacade& AllocatorFacade::Instance() { ...@@ -304,13 +303,13 @@ AllocatorFacade& AllocatorFacade::Instance() {
} }
std::shared_ptr<Allocation> AllocatorFacade::AllocShared( std::shared_ptr<Allocation> AllocatorFacade::AllocShared(
const platform::Place& place, size_t size, Allocator::Attr attr) { const platform::Place& place, size_t size) {
return std::shared_ptr<Allocation>(Alloc(place, size, attr)); return std::shared_ptr<Allocation>(Alloc(place, size));
} }
AllocationPtr AllocatorFacade::Alloc(const platform::Place& place, size_t size, AllocationPtr AllocatorFacade::Alloc(const platform::Place& place,
Allocator::Attr attr) { size_t size) {
return m_->GetAllocator(place, size)->Allocate(size, attr); return m_->GetAllocator(place, size)->Allocate(size);
} }
} // namespace allocation } // namespace allocation
......
...@@ -38,13 +38,11 @@ class AllocatorFacade { ...@@ -38,13 +38,11 @@ class AllocatorFacade {
static AllocatorFacade& Instance(); static AllocatorFacade& Instance();
// Allocate a shared allocation. // Allocate a shared allocation.
std::shared_ptr<Allocation> AllocShared( std::shared_ptr<Allocation> AllocShared(const platform::Place& place,
const platform::Place& place, size_t size, size_t size);
Allocator::Attr attr = Allocator::kDefault);
// Allocate a unique allocation. // Allocate a unique allocation.
AllocationPtr Alloc(const platform::Place& place, size_t size, AllocationPtr Alloc(const platform::Place& place, size_t size);
Allocator::Attr attr = Allocator::kDefault);
// TODO(yy): Allocate a Copy-On-Write allocation? // TODO(yy): Allocate a Copy-On-Write allocation?
private: private:
......
...@@ -34,14 +34,13 @@ std::shared_ptr<Allocator> AutoIncrementAllocator::CreateNewAllocator() { ...@@ -34,14 +34,13 @@ std::shared_ptr<Allocator> AutoIncrementAllocator::CreateNewAllocator() {
"bug."); "bug.");
return underlying_allocators_[old_size]; return underlying_allocators_[old_size];
} }
Allocation *AutoIncrementAllocator::AllocateImpl(size_t size, Allocation *AutoIncrementAllocator::AllocateImpl(size_t size) {
Allocator::Attr attr) {
auto cur = prev_success_allocator_.load(); auto cur = prev_success_allocator_.load();
size_t retry_count = allocator_num_.load(); size_t retry_count = allocator_num_.load();
size_t allocator_num = retry_count; size_t allocator_num = retry_count;
while (retry_count-- > 0) { // until there retry count is zero while (retry_count-- > 0) { // until there retry count is zero
try { try {
auto res = underlying_allocators_[cur]->Allocate(size, attr); auto res = underlying_allocators_[cur]->Allocate(size);
prev_success_allocator_ = cur; prev_success_allocator_ = cur;
return res.release(); return res.release();
} catch (BadAlloc &) { } catch (BadAlloc &) {
...@@ -61,7 +60,7 @@ Allocation *AutoIncrementAllocator::AllocateImpl(size_t size, ...@@ -61,7 +60,7 @@ Allocation *AutoIncrementAllocator::AllocateImpl(size_t size,
// the newly created allocator by the first allocation request. // the newly created allocator by the first allocation request.
for (cur = allocator_num; cur < allocator_num_; ++cur) { for (cur = allocator_num; cur < allocator_num_; ++cur) {
try { try {
auto ret = underlying_allocators_[cur]->Allocate(size, attr); auto ret = underlying_allocators_[cur]->Allocate(size);
prev_success_allocator_ = cur; prev_success_allocator_ = cur;
return ret.release(); return ret.release();
} catch (BadAlloc &) { } catch (BadAlloc &) {
...@@ -70,7 +69,7 @@ Allocation *AutoIncrementAllocator::AllocateImpl(size_t size, ...@@ -70,7 +69,7 @@ Allocation *AutoIncrementAllocator::AllocateImpl(size_t size,
} }
} }
// No suitable allocator // No suitable allocator
return CreateNewAllocator()->Allocate(size, attr).release(); return CreateNewAllocator()->Allocate(size).release();
} }
} // namespace allocation } // namespace allocation
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <memory> #include <memory>
#include <mutex> // NOLINT #include <mutex> // NOLINT
#include <thread> // NOLINT #include <thread> // NOLINT
#include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/memory/allocation/allocator.h"
...@@ -60,7 +61,7 @@ class AutoIncrementAllocator : public Allocator { ...@@ -60,7 +61,7 @@ class AutoIncrementAllocator : public Allocator {
std::shared_ptr<Allocator> CreateNewAllocator(); std::shared_ptr<Allocator> CreateNewAllocator();
protected: protected:
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation* AllocateImpl(size_t size) override;
private: private:
AllocatorCreator creator_; AllocatorCreator creator_;
......
...@@ -140,7 +140,7 @@ void BestFitAllocator::FreeImpl(Allocation* allocation) { ...@@ -140,7 +140,7 @@ void BestFitAllocator::FreeImpl(Allocation* allocation) {
InsertFreeNode(chunk_it); InsertFreeNode(chunk_it);
delete allocation; delete allocation;
} }
Allocation* BestFitAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation* BestFitAllocator::AllocateImpl(size_t size) {
auto highest_set_bit = static_cast<size_t>(HighestBitPos(size)); auto highest_set_bit = static_cast<size_t>(HighestBitPos(size));
MapIt map_it; MapIt map_it;
for (; highest_set_bit < free_chunks_.size(); ++highest_set_bit) { for (; highest_set_bit < free_chunks_.size(); ++highest_set_bit) {
......
...@@ -120,7 +120,7 @@ class BestFitAllocator : public Allocator { ...@@ -120,7 +120,7 @@ class BestFitAllocator : public Allocator {
protected: protected:
void FreeImpl(Allocation* allocation) override; void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation* AllocateImpl(size_t size) override;
private: private:
Allocation* allocation_; // not owned Allocation* allocation_; // not owned
......
...@@ -13,8 +13,10 @@ ...@@ -13,8 +13,10 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/memory/allocation/best_fit_allocator.h" #include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include <memory>
#include <random> #include <random>
#include <thread> // NOLINT #include <thread> // NOLINT
#include <utility>
#include <vector> #include <vector>
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h" #include "paddle/fluid/memory/allocation/cpu_allocator.h"
...@@ -33,10 +35,10 @@ class StubAllocation : public Allocation { ...@@ -33,10 +35,10 @@ class StubAllocation : public Allocation {
TEST(BestFitAllocator, test_allocation) { TEST(BestFitAllocator, test_allocation) {
StubAllocation stub(4UL * 1024 * 1024 * 1024); StubAllocation stub(4UL * 1024 * 1024 * 1024);
BestFitAllocator allocator(&stub); BestFitAllocator allocator(&stub);
{ auto allocation = allocator.Allocate(64, allocator.kDefault); } { auto allocation = allocator.Allocate(64); }
{ {
auto allocation = allocator.Allocate(80, allocator.kDefault); auto allocation = allocator.Allocate(80);
{ {
auto best_fit_allocation = auto best_fit_allocation =
...@@ -48,10 +50,10 @@ TEST(BestFitAllocator, test_allocation) { ...@@ -48,10 +50,10 @@ TEST(BestFitAllocator, test_allocation) {
ASSERT_EQ(allocation->ptr(), nullptr); ASSERT_EQ(allocation->ptr(), nullptr);
} }
auto allocation2 = allocator.Allocate(60, allocator.kDefault); auto allocation2 = allocator.Allocate(60);
auto allocation3 = allocator.Allocate(90, allocator.kDefault); auto allocation3 = allocator.Allocate(90);
allocation2.reset(); allocation2.reset();
allocation2 = allocator.Allocate(30, allocator.kDefault); allocation2 = allocator.Allocate(30);
{ {
auto best_fit_allocation = auto best_fit_allocation =
...@@ -59,7 +61,7 @@ TEST(BestFitAllocator, test_allocation) { ...@@ -59,7 +61,7 @@ TEST(BestFitAllocator, test_allocation) {
ASSERT_EQ(best_fit_allocation->ChunkIterator()->offset_, 80); ASSERT_EQ(best_fit_allocation->ChunkIterator()->offset_, 80);
} }
allocation2.reset(); allocation2.reset();
allocation2 = allocator.Allocate(60, allocator.kDefault); allocation2 = allocator.Allocate(60);
{ {
auto best_fit_allocation = auto best_fit_allocation =
...@@ -70,7 +72,7 @@ TEST(BestFitAllocator, test_allocation) { ...@@ -70,7 +72,7 @@ TEST(BestFitAllocator, test_allocation) {
allocation.reset(); allocation.reset();
allocation2.reset(); allocation2.reset();
allocation = allocator.Allocate(80 + 60, allocator.kDefault); allocation = allocator.Allocate(80 + 60);
{ {
auto best_fit_allocation = auto best_fit_allocation =
dynamic_cast<BestFitAllocation*>(allocation.get()); dynamic_cast<BestFitAllocation*>(allocation.get());
...@@ -79,8 +81,8 @@ TEST(BestFitAllocator, test_allocation) { ...@@ -79,8 +81,8 @@ TEST(BestFitAllocator, test_allocation) {
allocation.reset(); allocation.reset();
allocation = allocator.Allocate(80, allocator.kDefault); allocation = allocator.Allocate(80);
allocation2 = allocator.Allocate(60, allocator.kDefault); allocation2 = allocator.Allocate(60);
allocation = nullptr; allocation = nullptr;
allocation2 = nullptr; allocation2 = nullptr;
allocation3 = nullptr; allocation3 = nullptr;
...@@ -91,8 +93,7 @@ TEST(BestFitAllocator, test_allocation) { ...@@ -91,8 +93,7 @@ TEST(BestFitAllocator, test_allocation) {
TEST(BestFitAllocator, test_concurrent_cpu_allocation) { TEST(BestFitAllocator, test_concurrent_cpu_allocation) {
CPUAllocator allocator; CPUAllocator allocator;
auto global_allocation = auto global_allocation = allocator.Allocate(256UL * 1024 * 1024);
allocator.Allocate(256UL * 1024 * 1024, allocator.kDefault);
std::unique_ptr<Allocator> best_fit_allocator( std::unique_ptr<Allocator> best_fit_allocator(
new BestFitAllocator(global_allocation.get())); new BestFitAllocator(global_allocation.get()));
...@@ -106,8 +107,8 @@ TEST(BestFitAllocator, test_concurrent_cpu_allocation) { ...@@ -106,8 +107,8 @@ TEST(BestFitAllocator, test_concurrent_cpu_allocation) {
for (size_t i = 0; i < 128; ++i) { for (size_t i = 0; i < 128; ++i) {
size_t allocate_size = dist(engine); size_t allocate_size = dist(engine);
auto allocation = locked_allocator.Allocate( auto allocation =
sizeof(size_t) * allocate_size, locked_allocator.kDefault); locked_allocator.Allocate(sizeof(size_t) * allocate_size);
size_t* data = reinterpret_cast<size_t*>(allocation->ptr()); size_t* data = reinterpret_cast<size_t*>(allocation->ptr());
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include <memory>
#include <random> #include <random>
#include <thread> // NOLINT #include <thread> // NOLINT
#include <vector> #include <vector>
...@@ -36,8 +37,7 @@ struct ForEachFill { ...@@ -36,8 +37,7 @@ struct ForEachFill {
TEST(BestFitAllocator, concurrent_cuda) { TEST(BestFitAllocator, concurrent_cuda) {
CUDAAllocator allocator(platform::CUDAPlace(0)); CUDAAllocator allocator(platform::CUDAPlace(0));
// 256 MB // 256 MB
auto cuda_allocation = auto cuda_allocation = allocator.Allocate(256U * 1024 * 1024);
allocator.Allocate(256U * 1024 * 1024, allocator.kDefault);
LockedAllocator concurrent_allocator( LockedAllocator concurrent_allocator(
std::unique_ptr<Allocator>(new BestFitAllocator(cuda_allocation.get()))); std::unique_ptr<Allocator>(new BestFitAllocator(cuda_allocation.get())));
...@@ -50,8 +50,8 @@ TEST(BestFitAllocator, concurrent_cuda) { ...@@ -50,8 +50,8 @@ TEST(BestFitAllocator, concurrent_cuda) {
for (size_t i = 0; i < 128; ++i) { for (size_t i = 0; i < 128; ++i) {
size_t allocate_size = dist(engine); size_t allocate_size = dist(engine);
auto allocation = concurrent_allocator.Allocate( auto allocation =
sizeof(size_t) * allocate_size, concurrent_allocator.kDefault); concurrent_allocator.Allocate(sizeof(size_t) * allocate_size);
size_t* data = reinterpret_cast<size_t*>(allocation->ptr()); size_t* data = reinterpret_cast<size_t*>(allocation->ptr());
......
...@@ -53,7 +53,7 @@ void BufferedAllocator::FreeImpl(Allocation *allocation) { ...@@ -53,7 +53,7 @@ void BufferedAllocator::FreeImpl(Allocation *allocation) {
allocations_.emplace(allocation->size(), AllocationPtr(allocation)); allocations_.emplace(allocation->size(), AllocationPtr(allocation));
} }
Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation *BufferedAllocator::AllocateImpl(size_t size) {
{ {
platform::LockGuardPtr<std::mutex> guard(mtx_); platform::LockGuardPtr<std::mutex> guard(mtx_);
auto it = allocations_.lower_bound(size); auto it = allocations_.lower_bound(size);
...@@ -65,10 +65,10 @@ Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { ...@@ -65,10 +65,10 @@ Allocation *BufferedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) {
} }
try { try {
return underlying_allocator_->Allocate(size, attr).release(); return underlying_allocator_->Allocate(size).release();
} catch (BadAlloc &) { } catch (BadAlloc &) {
FreeCache(size); FreeCache(size);
return underlying_allocator_->Allocate(size, attr).release(); return underlying_allocator_->Allocate(size).release();
} }
} }
......
...@@ -45,7 +45,7 @@ class BufferedAllocator : public Allocator { ...@@ -45,7 +45,7 @@ class BufferedAllocator : public Allocator {
protected: protected:
void FreeImpl(Allocation *allocation) override; void FreeImpl(Allocation *allocation) override;
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation *AllocateImpl(size_t size) override;
private: private:
std::shared_ptr<Allocator> underlying_allocator_; std::shared_ptr<Allocator> underlying_allocator_;
......
...@@ -36,7 +36,7 @@ inline std::unique_ptr<BufferedAllocator> GetBufferedAllocator( ...@@ -36,7 +36,7 @@ inline std::unique_ptr<BufferedAllocator> GetBufferedAllocator(
TEST(buffered_allocator, thread_safety) { TEST(buffered_allocator, thread_safety) {
std::unique_ptr<CPUAllocator> allocator(new CPUAllocator()); std::unique_ptr<CPUAllocator> allocator(new CPUAllocator());
auto chunk = allocator->Allocate(1 << 20, allocator->kDefault); auto chunk = allocator->Allocate(1 << 20);
{ {
auto buf_allocator = GetBufferedAllocator(chunk.get(), true); auto buf_allocator = GetBufferedAllocator(chunk.get(), true);
ASSERT_EQ(buf_allocator->IsAllocThreadSafe(), true); ASSERT_EQ(buf_allocator->IsAllocThreadSafe(), true);
...@@ -72,7 +72,7 @@ class StubAllocator : public Allocator { ...@@ -72,7 +72,7 @@ class StubAllocator : public Allocator {
++destruct_count_; ++destruct_count_;
delete allocation; delete allocation;
} }
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override { Allocation *AllocateImpl(size_t size) override {
++construct_count_; ++construct_count_;
if (size == 0) { if (size == 0) {
return new StubAllocation(nullptr, 0, platform::CPUPlace()); return new StubAllocation(nullptr, 0, platform::CPUPlace());
...@@ -98,7 +98,7 @@ TEST(buffered_allocator, lazy_free) { ...@@ -98,7 +98,7 @@ TEST(buffered_allocator, lazy_free) {
{ {
underlying_allocator->ResetCounter(); underlying_allocator->ResetCounter();
auto x = allocator->Allocate(1025, allocator->kDefault); auto x = allocator->Allocate(1025);
ASSERT_EQ(underlying_allocator->GetAllocCount(), kOne); ASSERT_EQ(underlying_allocator->GetAllocCount(), kOne);
ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero); ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
x = nullptr; x = nullptr;
...@@ -107,10 +107,10 @@ TEST(buffered_allocator, lazy_free) { ...@@ -107,10 +107,10 @@ TEST(buffered_allocator, lazy_free) {
{ {
underlying_allocator->ResetCounter(); underlying_allocator->ResetCounter();
auto x = allocator->Allocate(900, allocator->kDefault); auto x = allocator->Allocate(900);
ASSERT_EQ(underlying_allocator->GetAllocCount(), kZero); ASSERT_EQ(underlying_allocator->GetAllocCount(), kZero);
ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero); ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
auto y = allocator->Allocate(2048, allocator->kDefault); auto y = allocator->Allocate(2048);
ASSERT_EQ(underlying_allocator->GetAllocCount(), kOne); ASSERT_EQ(underlying_allocator->GetAllocCount(), kOne);
ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero); ASSERT_EQ(underlying_allocator->GetFreeCount(), kZero);
x = nullptr; x = nullptr;
...@@ -129,13 +129,13 @@ TEST(buffered_allocator, lazy_free) { ...@@ -129,13 +129,13 @@ TEST(buffered_allocator, lazy_free) {
TEST(buffered_allocator, garbage_collection) { TEST(buffered_allocator, garbage_collection) {
std::unique_ptr<CPUAllocator> cpu_allocator(new CPUAllocator()); std::unique_ptr<CPUAllocator> cpu_allocator(new CPUAllocator());
auto chunk = cpu_allocator->Allocate(2048, cpu_allocator->kDefault); auto chunk = cpu_allocator->Allocate(2048);
auto allocator = GetBufferedAllocator(chunk.get(), false); auto allocator = GetBufferedAllocator(chunk.get(), false);
auto x1 = allocator->Allocate(1600, allocator->kDefault); auto x1 = allocator->Allocate(1600);
auto x2 = allocator->Allocate(400, allocator->kDefault); auto x2 = allocator->Allocate(400);
x1 = nullptr; x1 = nullptr;
x2 = nullptr; x2 = nullptr;
auto x3 = allocator->Allocate(1600, allocator->kDefault); auto x3 = allocator->Allocate(1600);
ASSERT_NE(x3, nullptr); ASSERT_NE(x3, nullptr);
ASSERT_NE(x3->ptr(), nullptr); ASSERT_NE(x3->ptr(), nullptr);
} }
......
...@@ -13,14 +13,14 @@ ...@@ -13,14 +13,14 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/memory/allocation/conditional_allocator.h" #include "paddle/fluid/memory/allocation/conditional_allocator.h"
#include <memory>
namespace paddle { namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
ConditionalAllocator& ConditionalAllocator::AddAllocator( ConditionalAllocator& ConditionalAllocator::AddAllocator(
std::function<bool(size_t, Allocator::Attr)> func, std::function<bool(size_t)> func, std::shared_ptr<Allocator> allocator) {
std::shared_ptr<Allocator> allocator) {
underlying_allocators_.emplace_back(std::move(func), std::move(allocator)); underlying_allocators_.emplace_back(std::move(func), std::move(allocator));
return *this; return *this;
} }
...@@ -33,11 +33,10 @@ bool ConditionalAllocator::IsAllocThreadSafe() const { ...@@ -33,11 +33,10 @@ bool ConditionalAllocator::IsAllocThreadSafe() const {
}); });
} }
Allocation* ConditionalAllocator::AllocateImpl(size_t size, Allocation* ConditionalAllocator::AllocateImpl(size_t size) {
Allocator::Attr attr) {
for (auto& pair : underlying_allocators_) { for (auto& pair : underlying_allocators_) {
if (pair.first(size, attr)) { if (pair.first(size)) {
return pair.second->Allocate(size, attr).release(); return pair.second->Allocate(size).release();
} }
} }
throw BadAlloc("No suitable allocator"); throw BadAlloc("No suitable allocator");
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#pragma once #pragma once
#include <functional> #include <functional>
#include <memory>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/memory/allocation/allocator.h"
...@@ -28,13 +29,10 @@ namespace allocation { ...@@ -28,13 +29,10 @@ namespace allocation {
// For example: // For example:
// //
// auto* cond_allocator = new ConditionalAllocator(); // auto* cond_allocator = new ConditionalAllocator();
// cond_allocator->AddAllocator([](size_t size, Attr attr){ // cond_allocator->AddAllocator([](size_t size){
// // if size > 10 // // if size > 10
// return size > 10; // return size > 10;
// }, allocator_a).AddAllocator([](size_t size, Attr attr){ // }, allocator_b).AddAllocator([](size_t size){
// // elif attr is kDefault
// return attr == kDefault;
// }, allocator_b).AddAllocator([](size_t size, Attr attr){
// // else // // else
// return true; // return true;
// }, allocator_c); // }, allocator_c);
...@@ -42,17 +40,17 @@ class ConditionalAllocator : public Allocator { ...@@ -42,17 +40,17 @@ class ConditionalAllocator : public Allocator {
public: public:
ConditionalAllocator() = default; ConditionalAllocator() = default;
ConditionalAllocator& AddAllocator(std::function<bool(size_t, Attr)> func, ConditionalAllocator& AddAllocator(std::function<bool(size_t)> func,
std::shared_ptr<Allocator> allocator); std::shared_ptr<Allocator> allocator);
bool IsAllocThreadSafe() const override; bool IsAllocThreadSafe() const override;
protected: protected:
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation* AllocateImpl(size_t size) override;
private: private:
using AllocatorWithCond = using AllocatorWithCond =
std::pair<std::function<bool(size_t, Attr)>, std::shared_ptr<Allocator>>; std::pair<std::function<bool(size_t)>, std::shared_ptr<Allocator>>;
std::vector<AllocatorWithCond> underlying_allocators_; std::vector<AllocatorWithCond> underlying_allocators_;
}; };
......
...@@ -32,7 +32,7 @@ void CPUAllocator::FreeImpl(Allocation *allocation) { ...@@ -32,7 +32,7 @@ void CPUAllocator::FreeImpl(Allocation *allocation) {
delete allocation; delete allocation;
} }
Allocation *CPUAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation *CPUAllocator::AllocateImpl(size_t size) {
void *p; void *p;
#ifdef _WIN32 #ifdef _WIN32
p = _aligned_malloc(size, kAlignment); p = _aligned_malloc(size, kAlignment);
......
...@@ -38,7 +38,7 @@ class CPUAllocator : public Allocator { ...@@ -38,7 +38,7 @@ class CPUAllocator : public Allocator {
protected: protected:
void FreeImpl(Allocation* allocation) override; void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation* AllocateImpl(size_t size) override;
}; };
} // namespace allocation } // namespace allocation
} // namespace memory } // namespace memory
......
...@@ -31,7 +31,7 @@ void CUDAAllocator::FreeImpl(Allocation* allocation) { ...@@ -31,7 +31,7 @@ void CUDAAllocator::FreeImpl(Allocation* allocation) {
delete allocation; delete allocation;
} }
Allocation* CUDAAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation* CUDAAllocator::AllocateImpl(size_t size) {
platform::CUDADeviceGuard guard(place_.device); platform::CUDADeviceGuard guard(place_.device);
void* ptr; void* ptr;
auto status = cudaMalloc(&ptr, size); auto status = cudaMalloc(&ptr, size);
......
...@@ -29,7 +29,7 @@ class CUDAAllocator : public Allocator { ...@@ -29,7 +29,7 @@ class CUDAAllocator : public Allocator {
protected: protected:
void FreeImpl(Allocation* allocation) override; void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation* AllocateImpl(size_t size) override;
private: private:
platform::CUDAPlace place_; platform::CUDAPlace place_;
......
...@@ -339,7 +339,7 @@ size_t Usage::operator()(const platform::CUDAPinnedPlace &cuda_pinned) const { ...@@ -339,7 +339,7 @@ size_t Usage::operator()(const platform::CUDAPinnedPlace &cuda_pinned) const {
namespace allocation { namespace allocation {
LegacyMemMonitor GPUMemMonitor; LegacyMemMonitor GPUMemMonitor;
Allocation *LegacyAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation *LegacyAllocator::AllocateImpl(size_t size) {
void *ptr = boost::apply_visitor(legacy::AllocVisitor(size), place_); void *ptr = boost::apply_visitor(legacy::AllocVisitor(size), place_);
auto *tmp_alloc = new Allocation(ptr, size, place_); auto *tmp_alloc = new Allocation(ptr, size, place_);
platform::MemEvenRecorder::Instance().PushMemRecord( platform::MemEvenRecorder::Instance().PushMemRecord(
......
...@@ -72,7 +72,7 @@ class LegacyAllocator : public Allocator { ...@@ -72,7 +72,7 @@ class LegacyAllocator : public Allocator {
explicit LegacyAllocator(const platform::Place &p) : place_(p) {} explicit LegacyAllocator(const platform::Place &p) : place_(p) {}
protected: protected:
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation *AllocateImpl(size_t size) override;
void FreeImpl(Allocation *allocation) override; void FreeImpl(Allocation *allocation) override;
private: private:
......
...@@ -37,9 +37,9 @@ void LockedAllocator::FreeImpl(Allocation *allocation) { ...@@ -37,9 +37,9 @@ void LockedAllocator::FreeImpl(Allocation *allocation) {
underlying_allocator_->Free(allocation); underlying_allocator_->Free(allocation);
} }
Allocation *LockedAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation *LockedAllocator::AllocateImpl(size_t size) {
platform::LockGuardPtr<std::mutex> guard(mtx_); platform::LockGuardPtr<std::mutex> guard(mtx_);
return underlying_allocator_->Allocate(size, attr).release(); return underlying_allocator_->Allocate(size).release();
} }
} // namespace allocation } // namespace allocation
......
...@@ -29,7 +29,7 @@ class LockedAllocator : public Allocator { ...@@ -29,7 +29,7 @@ class LockedAllocator : public Allocator {
protected: protected:
void FreeImpl(Allocation *allocation) override; void FreeImpl(Allocation *allocation) override;
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation *AllocateImpl(size_t size) override;
private: private:
std::shared_ptr<Allocator> underlying_allocator_; std::shared_ptr<Allocator> underlying_allocator_;
......
...@@ -24,8 +24,7 @@ void CPUPinnedAllocator::FreeImpl(Allocation *allocation) { ...@@ -24,8 +24,7 @@ void CPUPinnedAllocator::FreeImpl(Allocation *allocation) {
PADDLE_ENFORCE(cudaFreeHost(allocation->ptr())); PADDLE_ENFORCE(cudaFreeHost(allocation->ptr()));
delete allocation; delete allocation;
} }
Allocation *CPUPinnedAllocator::AllocateImpl(size_t size, Allocation *CPUPinnedAllocator::AllocateImpl(size_t size) {
Allocator::Attr attr) {
void *ptr; void *ptr;
PADDLE_ENFORCE(cudaHostAlloc(&ptr, size, cudaHostAllocPortable)); PADDLE_ENFORCE(cudaHostAlloc(&ptr, size, cudaHostAllocPortable));
return new Allocation(ptr, size, platform::CUDAPinnedPlace()); return new Allocation(ptr, size, platform::CUDAPinnedPlace());
......
...@@ -26,7 +26,7 @@ class CPUPinnedAllocator : public Allocator { ...@@ -26,7 +26,7 @@ class CPUPinnedAllocator : public Allocator {
protected: protected:
void FreeImpl(Allocation *allocation) override; void FreeImpl(Allocation *allocation) override;
Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation *AllocateImpl(size_t size) override;
}; };
} // namespace allocation } // namespace allocation
......
...@@ -23,9 +23,9 @@ void RetryAllocator::FreeImpl(Allocation* allocation) { ...@@ -23,9 +23,9 @@ void RetryAllocator::FreeImpl(Allocation* allocation) {
cv_.notify_all(); cv_.notify_all();
} }
Allocation* RetryAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { Allocation* RetryAllocator::AllocateImpl(size_t size) {
auto alloc_func = [&, this]() { auto alloc_func = [&, this]() {
return underlying_allocator_->Allocate(size, attr).release(); return underlying_allocator_->Allocate(size).release();
}; };
// In fact, we can unify the code of allocation success and failure // In fact, we can unify the code of allocation success and failure
// But it would add lock even when allocation success at the first time // But it would add lock even when allocation success at the first time
......
...@@ -40,7 +40,7 @@ class RetryAllocator : public Allocator { ...@@ -40,7 +40,7 @@ class RetryAllocator : public Allocator {
protected: protected:
void FreeImpl(Allocation* allocation) override; void FreeImpl(Allocation* allocation) override;
Allocation* AllocateImpl(size_t size, Allocator::Attr attr) override; Allocation* AllocateImpl(size_t size) override;
private: private:
std::shared_ptr<Allocator> underlying_allocator_; std::shared_ptr<Allocator> underlying_allocator_;
......
...@@ -32,7 +32,7 @@ TEST(RetryAllocator, RetryAllocator) { ...@@ -32,7 +32,7 @@ TEST(RetryAllocator, RetryAllocator) {
CPUAllocator cpu_allocator; CPUAllocator cpu_allocator;
size_t size = (1 << 20); size_t size = (1 << 20);
auto cpu_allocation = cpu_allocator.Allocate(size, cpu_allocator.kDefault); auto cpu_allocation = cpu_allocator.Allocate(size);
std::unique_ptr<BestFitAllocator> best_fit_allocator( std::unique_ptr<BestFitAllocator> best_fit_allocator(
new BestFitAllocator(cpu_allocation.get())); new BestFitAllocator(cpu_allocation.get()));
......
...@@ -21,13 +21,12 @@ limitations under the License. */ ...@@ -21,13 +21,12 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace memory { namespace memory {
std::shared_ptr<Allocation> AllocShared(const platform::Place& place, std::shared_ptr<Allocation> AllocShared(const platform::Place& place,
size_t size, Allocator::Attr attr) { size_t size) {
return allocation::AllocatorFacade::Instance().AllocShared(place, size, attr); return allocation::AllocatorFacade::Instance().AllocShared(place, size);
} }
AllocationPtr Alloc(const platform::Place& place, size_t size, AllocationPtr Alloc(const platform::Place& place, size_t size) {
Allocator::Attr attr) { return allocation::AllocatorFacade::Instance().Alloc(place, size);
return allocation::AllocatorFacade::Instance().Alloc(place, size, attr);
} }
} // namespace memory } // namespace memory
......
...@@ -23,12 +23,10 @@ using allocation::Allocation; ...@@ -23,12 +23,10 @@ using allocation::Allocation;
using allocation::Allocator; using allocation::Allocator;
using allocation::AllocationPtr; using allocation::AllocationPtr;
extern std::shared_ptr<Allocation> AllocShared( extern std::shared_ptr<Allocation> AllocShared(const platform::Place& place,
const platform::Place& place, size_t size, size_t size);
Allocator::Attr attr = Allocator::kDefault);
extern AllocationPtr Alloc(const platform::Place& place, size_t size, extern AllocationPtr Alloc(const platform::Place& place, size_t size);
Allocator::Attr attr = Allocator::kDefault);
} // namespace memory } // namespace memory
} // namespace paddle } // namespace paddle
...@@ -136,7 +136,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> { ...@@ -136,7 +136,7 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
} }
// ------------------- cudnn conv algorithm --------------------- // ------------------- cudnn conv algorithm ---------------------
cudnnConvolutionFwdAlgo_t algo; cudnnConvolutionFwdAlgo_t algo{};
bool half_float = false; bool half_float = false;
#if CUDA_VERSION >= 9000 && CUDNN_VERSION_MIN(7, 0, 1) #if CUDA_VERSION >= 9000 && CUDNN_VERSION_MIN(7, 0, 1)
...@@ -361,8 +361,8 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> { ...@@ -361,8 +361,8 @@ class CUDNNConvGradOpKernel : public framework::OpKernel<T> {
int group_offset_out = o_c / groups * o_h * o_w * o_d; int group_offset_out = o_c / groups * o_h * o_w * o_d;
int group_offset_filter = filter->numel() / groups; int group_offset_filter = filter->numel() / groups;
// ------------------- cudnn backward algorithm --------------------- // ------------------- cudnn backward algorithm ---------------------
cudnnConvolutionBwdDataAlgo_t data_algo; cudnnConvolutionBwdDataAlgo_t data_algo{};
cudnnConvolutionBwdFilterAlgo_t filter_algo; cudnnConvolutionBwdFilterAlgo_t filter_algo{};
size_t workspace_size_in_bytes = 0, tmp_size = 0; size_t workspace_size_in_bytes = 0, tmp_size = 0;
size_t workspace_size_limit = 0; size_t workspace_size_limit = 0;
if (FLAGS_conv_workspace_size_limit > 0 || user_workspace_size > 0) { if (FLAGS_conv_workspace_size_limit > 0 || user_workspace_size > 0) {
......
...@@ -73,8 +73,8 @@ class CVMOpKernel : public framework::OpKernel<T> { ...@@ -73,8 +73,8 @@ class CVMOpKernel : public framework::OpKernel<T> {
} }
} else { } else {
auto lod = x->lod()[0]; auto lod = x->lod()[0];
for (int i = 0; i < lod.size() - 1; ++i) { for (size_t i = 0; i < lod.size() - 1; ++i) {
for (int j = 0; j < lod[i + 1] - lod[i]; ++j) { for (size_t j = 0; j < lod[i + 1] - lod[i]; ++j) {
CvmComputeKernel(use_cvm, item_size, &x_data, &y_data); CvmComputeKernel(use_cvm, item_size, &x_data, &y_data);
} }
} }
...@@ -113,7 +113,7 @@ class CVMGradOpKernel : public framework::OpKernel<T> { ...@@ -113,7 +113,7 @@ class CVMGradOpKernel : public framework::OpKernel<T> {
auto lod = dx->lod()[0]; auto lod = dx->lod()[0];
int seq_num = static_cast<int>(lod.size()) - 1; int seq_num = static_cast<int>(lod.size()) - 1;
for (int i = 0; i < seq_num; ++i) { for (int i = 0; i < seq_num; ++i) {
for (int j = 0; j < lod[i + 1] - lod[i]; ++j) { for (size_t j = 0; j < lod[i + 1] - lod[i]; ++j) {
CvmGradComputeKernel(use_cvm, item_size, *cvm_data, &dout_data, CvmGradComputeKernel(use_cvm, item_size, *cvm_data, &dout_data,
&dx_data); &dx_data);
} }
......
...@@ -127,8 +127,7 @@ class GPUCollectFpnProposalsOpKernel : public framework::OpKernel<T> { ...@@ -127,8 +127,7 @@ class GPUCollectFpnProposalsOpKernel : public framework::OpKernel<T> {
nullptr, temp_storage_bytes, concat_scores.data<T>(), keys_out, idx_in, nullptr, temp_storage_bytes, concat_scores.data<T>(), keys_out, idx_in,
idx_out, total_roi_num); idx_out, total_roi_num);
// Allocate temporary storage // Allocate temporary storage
auto d_temp_storage = memory::Alloc(place, temp_storage_bytes, auto d_temp_storage = memory::Alloc(place, temp_storage_bytes);
memory::Allocator::kScratchpad);
// Run sorting operation // Run sorting operation
// sort score to get corresponding index // sort score to get corresponding index
...@@ -160,8 +159,7 @@ class GPUCollectFpnProposalsOpKernel : public framework::OpKernel<T> { ...@@ -160,8 +159,7 @@ class GPUCollectFpnProposalsOpKernel : public framework::OpKernel<T> {
nullptr, temp_storage_bytes, sorted_batch_id.data<int>(), out_id_data, nullptr, temp_storage_bytes, sorted_batch_id.data<int>(), out_id_data,
batch_idx_in, index_out_t.data<int>(), real_post_num); batch_idx_in, index_out_t.data<int>(), real_post_num);
// Allocate temporary storage // Allocate temporary storage
d_temp_storage = memory::Alloc(place, temp_storage_bytes, d_temp_storage = memory::Alloc(place, temp_storage_bytes);
memory::Allocator::kScratchpad);
// Run sorting operation // Run sorting operation
// sort batch_id to get corresponding index // sort batch_id to get corresponding index
......
...@@ -140,8 +140,7 @@ class GPUDistributeFpnProposalsOpKernel : public framework::OpKernel<T> { ...@@ -140,8 +140,7 @@ class GPUDistributeFpnProposalsOpKernel : public framework::OpKernel<T> {
target_lvls_data, keys_out, target_lvls_data, keys_out,
idx_in, idx_out, roi_num); idx_in, idx_out, roi_num);
// Allocate temporary storage // Allocate temporary storage
auto d_temp_storage = memory::Alloc(place, temp_storage_bytes, auto d_temp_storage = memory::Alloc(place, temp_storage_bytes);
memory::Allocator::kScratchpad);
// Run sorting operation // Run sorting operation
// sort target level to get corresponding index // sort target level to get corresponding index
......
...@@ -70,8 +70,7 @@ static void SortDescending(const platform::CUDADeviceContext &ctx, ...@@ -70,8 +70,7 @@ static void SortDescending(const platform::CUDADeviceContext &ctx,
nullptr, temp_storage_bytes, keys_in, keys_out, idx_in, idx_out, num); nullptr, temp_storage_bytes, keys_in, keys_out, idx_in, idx_out, num);
// Allocate temporary storage // Allocate temporary storage
auto place = boost::get<platform::CUDAPlace>(ctx.GetPlace()); auto place = boost::get<platform::CUDAPlace>(ctx.GetPlace());
auto d_temp_storage = auto d_temp_storage = memory::Alloc(place, temp_storage_bytes);
memory::Alloc(place, temp_storage_bytes, memory::Allocator::kScratchpad);
// Run sorting operation // Run sorting operation
cub::DeviceRadixSort::SortPairsDescending<T, int>( cub::DeviceRadixSort::SortPairsDescending<T, int>(
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
#include <nccl.h> #include <nccl.h>
#endif #endif
#include <memory>
#include <thread> // NOLINT #include <thread> // NOLINT
#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type.h"
...@@ -39,8 +40,7 @@ static TensorPayload GetCommunicationAllocationFromTensor( ...@@ -39,8 +40,7 @@ static TensorPayload GetCommunicationAllocationFromTensor(
reinterpret_cast<const platform::CUDADeviceContext&>(ctx); reinterpret_cast<const platform::CUDADeviceContext&>(ctx);
auto copy_size = tensor.numel() * framework::SizeOfType(tensor.type()); auto copy_size = tensor.numel() * framework::SizeOfType(tensor.type());
platform::CUDAPinnedPlace cuda_pinned; platform::CUDAPinnedPlace cuda_pinned;
auto result = memory::AllocShared( auto result = memory::AllocShared(cuda_pinned, copy_size);
cuda_pinned, copy_size, memory::allocation::Allocator::kCrossDevice);
memory::Copy(cuda_pinned, result->ptr(), memory::Copy(cuda_pinned, result->ptr(),
boost::get<platform::CUDAPlace>(tensor.place()), boost::get<platform::CUDAPlace>(tensor.place()),
......
...@@ -234,9 +234,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -234,9 +234,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
"same dimension sizes"); "same dimension sizes");
if (residual_param->format() != handler.GetDstFormat()) { if (residual_param->format() != handler.GetDstFormat()) {
auto output_data = output->mutable_data<T>( auto output_data =
ctx.GetPlace(), ::paddle::memory::Allocator::kDefault, output->mutable_data<T>(ctx.GetPlace(), handler.GetDstMemorySize());
handler.GetDstMemorySize());
auto residual_data_tz = auto residual_data_tz =
paddle::framework::vectorize2int(residual_param->dims()); paddle::framework::vectorize2int(residual_param->dims());
auto residual_data_type = auto residual_data_type =
...@@ -256,9 +255,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -256,9 +255,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
handler.AcquireDstMemoryFromPrimitive(to_void_cast<T>(output_data)); handler.AcquireDstMemoryFromPrimitive(to_void_cast<T>(output_data));
} }
} else { } else {
auto output_data = output->mutable_data<T>( auto output_data =
ctx.GetPlace(), paddle::memory::Allocator::kDefault, output->mutable_data<T>(ctx.GetPlace(), handler.GetDstMemorySize());
handler.GetDstMemorySize());
dst_memory_p = dst_memory_p =
handler.AcquireDstMemoryFromPrimitive(to_void_cast<T>(output_data)); handler.AcquireDstMemoryFromPrimitive(to_void_cast<T>(output_data));
} }
...@@ -893,8 +891,7 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -893,8 +891,7 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
user_diff_dst_memory_p, pipeline); user_diff_dst_memory_p, pipeline);
const size_t size = handler.GetDiffWeightsMemorySize(); const size_t size = handler.GetDiffWeightsMemorySize();
filter_grad_data = filter_grad->mutable_data<T>( filter_grad_data = filter_grad->mutable_data<T>(ctx.GetPlace(), size);
ctx.GetPlace(), paddle::memory::Allocator::kDefault, size);
auto diff_weights_memory_p = auto diff_weights_memory_p =
handler.AcquireDiffWeightsMemoryFromWeightsPrimitive( handler.AcquireDiffWeightsMemoryFromWeightsPrimitive(
...@@ -919,8 +916,7 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> { ...@@ -919,8 +916,7 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
pipeline); pipeline);
const size_t size = handler.GetDiffSourceMemorySize(); const size_t size = handler.GetDiffSourceMemorySize();
input_grad_data = input_grad->mutable_data<T>( input_grad_data = input_grad->mutable_data<T>(ctx.GetPlace(), size);
ctx.GetPlace(), paddle::memory::Allocator::kDefault, size);
auto diff_src_memory_p = handler.AcquireDiffSrcMemoryFromDataPrimitive( auto diff_src_memory_p = handler.AcquireDiffSrcMemoryFromDataPrimitive(
reinterpret_cast<void*>(input_grad_data)); reinterpret_cast<void*>(input_grad_data));
......
...@@ -188,9 +188,8 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -188,9 +188,8 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
std::shared_ptr<mkldnn::memory> dst_memory_p; std::shared_ptr<mkldnn::memory> dst_memory_p;
auto output_data = output->mutable_data<T>( auto output_data =
ctx.GetPlace(), paddle::memory::Allocator::kDefault, output->mutable_data<T>(ctx.GetPlace(), handler.GetDstMemorySize());
handler.GetDstMemorySize());
dst_memory_p = handler.AcquireDstMemoryFromPrimitive( dst_memory_p = handler.AcquireDstMemoryFromPrimitive(
platform::to_void_cast<T>(output_data)); platform::to_void_cast<T>(output_data));
......
...@@ -198,8 +198,7 @@ class FCPrimitiveFactory { ...@@ -198,8 +198,7 @@ class FCPrimitiveFactory {
const ExecutionContext& ctx, Tensor* output) { const ExecutionContext& ctx, Tensor* output) {
auto dst_prim_desc = fc_prim_desc.dst_primitive_desc(); auto dst_prim_desc = fc_prim_desc.dst_primitive_desc();
auto buffer_size = dst_prim_desc.get_size(); auto buffer_size = dst_prim_desc.get_size();
T* output_data = output->mutable_data<T>( T* output_data = output->mutable_data<T>(ctx.GetPlace(), buffer_size);
ctx.GetPlace(), ::paddle::memory::Allocator::kDefault, buffer_size);
output->set_format((memory::format)dst_prim_desc.desc().data.format); output->set_format((memory::format)dst_prim_desc.desc().data.format);
return memory(dst_prim_desc, to_void_cast<T>(output_data)); return memory(dst_prim_desc, to_void_cast<T>(output_data));
} }
......
...@@ -167,8 +167,7 @@ class EigenCudaStreamDevice : public Eigen::StreamInterface { ...@@ -167,8 +167,7 @@ class EigenCudaStreamDevice : public Eigen::StreamInterface {
if (UNLIKELY(num_bytes == 0)) { if (UNLIKELY(num_bytes == 0)) {
return nullptr; return nullptr;
} }
auto buf = paddle::memory::Alloc(place_, num_bytes, auto buf = paddle::memory::Alloc(place_, num_bytes);
memory::Allocator::kScratchpad);
void* retv = buf->ptr(); void* retv = buf->ptr();
{ {
std::lock_guard<std::mutex> lock(mtx_); std::lock_guard<std::mutex> lock(mtx_);
...@@ -232,8 +231,7 @@ void CudnnHolder::ReallocateWorkspace(size_t required_workspace_len) { ...@@ -232,8 +231,7 @@ void CudnnHolder::ReallocateWorkspace(size_t required_workspace_len) {
PADDLE_ENFORCE(cudaStreamSynchronize(*stream_)); PADDLE_ENFORCE(cudaStreamSynchronize(*stream_));
workspace_.reset(); workspace_.reset();
} }
workspace_ = paddle::memory::Alloc(place_, required_workspace_len, workspace_ = paddle::memory::Alloc(place_, required_workspace_len);
paddle::memory::Allocator::kScratchpad);
} }
CUDADeviceContext::CUDADeviceContext(CUDAPlace place) CUDADeviceContext::CUDADeviceContext(CUDAPlace place)
......
...@@ -325,8 +325,7 @@ class TransposeMKLDNNHandler : public MKLDNNHandler { ...@@ -325,8 +325,7 @@ class TransposeMKLDNNHandler : public MKLDNNHandler {
auto dst_mdp = mkldnn::memory::primitive_desc{ auto dst_mdp = mkldnn::memory::primitive_desc{
Axis2MemoryDesc(dims_, axis_), engine_}; Axis2MemoryDesc(dims_, axis_), engine_};
auto dst_data = output->mutable_data<float>( auto dst_data = output->mutable_data<float>(place, dst_mdp.get_size());
place, paddle::memory::Allocator::kDefault, dst_mdp.get_size());
mem_p = std::make_shared<mkldnn::memory>(dst_mdp, dst_data); mem_p = std::make_shared<mkldnn::memory>(dst_mdp, dst_data);
dev_ctx_.SetBlob(local_key, mem_p); dev_ctx_.SetBlob(local_key, mem_p);
...@@ -865,9 +864,8 @@ template <typename T> ...@@ -865,9 +864,8 @@ template <typename T>
static std::shared_ptr<mkldnn::memory> SetDstMemory( static std::shared_ptr<mkldnn::memory> SetDstMemory(
const framework::ExecutionContext& ctx, framework::Tensor* output, const framework::ExecutionContext& ctx, framework::Tensor* output,
const std::shared_ptr<ConvMKLDNNHandler>& handler) { const std::shared_ptr<ConvMKLDNNHandler>& handler) {
T* output_data = output->mutable_data<T>( T* output_data =
ctx.GetPlace(), ::paddle::memory::Allocator::kDefault, output->mutable_data<T>(ctx.GetPlace(), handler->GetDstMemorySize());
handler->GetDstMemorySize());
std::shared_ptr<mkldnn::memory> dst_memory_p = std::shared_ptr<mkldnn::memory> dst_memory_p =
handler->AcquireDstMemoryFromPrimitive(to_void_cast<T>(output_data)); handler->AcquireDstMemoryFromPrimitive(to_void_cast<T>(output_data));
return dst_memory_p; return dst_memory_p;
...@@ -898,9 +896,8 @@ static void SetDstMemoryHandler( ...@@ -898,9 +896,8 @@ static void SetDstMemoryHandler(
const framework::ExecutionContext& ctx, framework::Tensor* output, const framework::ExecutionContext& ctx, framework::Tensor* output,
const std::shared_ptr<ConvMKLDNNHandler>& handler, const std::shared_ptr<ConvMKLDNNHandler>& handler,
std::shared_ptr<mkldnn::memory>* dst_memory_p) { std::shared_ptr<mkldnn::memory>* dst_memory_p) {
T* output_data = output->mutable_data<T>( T* output_data =
ctx.GetPlace(), ::paddle::memory::Allocator::kDefault, output->mutable_data<T>(ctx.GetPlace(), handler->GetDstMemorySize());
handler->GetDstMemorySize());
(*dst_memory_p)->set_data_handle(to_void_cast<T>(output_data)); (*dst_memory_p)->set_data_handle(to_void_cast<T>(output_data));
} }
......
...@@ -90,8 +90,7 @@ void TemporaryAllocator::SetCallback(const std::function<void()> &callback) { ...@@ -90,8 +90,7 @@ void TemporaryAllocator::SetCallback(const std::function<void()> &callback) {
callback_ = callback; callback_ = callback;
} }
alloc::Allocation *TemporaryAllocator::AllocateImpl( alloc::Allocation *TemporaryAllocator::AllocateImpl(size_t size) {
size_t size, alloc::Allocator::Attr attr) {
{ {
// Find available allocation in temp_mem_map. // Find available allocation in temp_mem_map.
std::unique_lock<std::mutex> lock(mtx_); std::unique_lock<std::mutex> lock(mtx_);
...@@ -113,7 +112,7 @@ alloc::Allocation *TemporaryAllocator::AllocateImpl( ...@@ -113,7 +112,7 @@ alloc::Allocation *TemporaryAllocator::AllocateImpl(
} }
// If not find the the available allocation, get allocation from // If not find the the available allocation, get allocation from
// AllocatorFacadeInstance. // AllocatorFacadeInstance.
auto temp_mem = alloc::AllocatorFacade::Instance().Alloc(place_, size, attr); auto temp_mem = alloc::AllocatorFacade::Instance().Alloc(place_, size);
VLOG(10) << "Alloc temporary allocation: " << temp_mem->ptr() << ": " << size; VLOG(10) << "Alloc temporary allocation: " << temp_mem->ptr() << ": " << size;
return temp_mem.release(); return temp_mem.release();
} }
......
...@@ -51,8 +51,7 @@ class TemporaryAllocator : public memory::allocation::Allocator { ...@@ -51,8 +51,7 @@ class TemporaryAllocator : public memory::allocation::Allocator {
protected: protected:
void FreeImpl(memory::allocation::Allocation *allocation) override; void FreeImpl(memory::allocation::Allocation *allocation) override;
memory::allocation::Allocation *AllocateImpl( memory::allocation::Allocation *AllocateImpl(size_t size) override;
size_t size, memory::allocation::Allocator::Attr attr) override;
private: private:
platform::Place place_; platform::Place place_;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册