提交 21fdf8e8 编写于 作者: S sneaxiy

add unittest for allocator_facade.cc

上级 64d94596
...@@ -168,7 +168,7 @@ def train_parallel(train_args, test_args, args, train_prog, test_prog, ...@@ -168,7 +168,7 @@ def train_parallel(train_args, test_args, args, train_prog, test_prog,
startup_exe = fluid.Executor(place) startup_exe = fluid.Executor(place)
startup_exe.run(startup_prog) startup_exe.run(startup_prog)
strategy = fluid.ExecutionStrategy() strategy = fluid.ExecutionStrategy()
strategy.num_threads = args.cpus strategy.num_threads = 0 #args.cpus
strategy.allow_op_delay = False strategy.allow_op_delay = False
build_strategy = fluid.BuildStrategy() build_strategy = fluid.BuildStrategy()
if args.reduce_strategy == "reduce": if args.reduce_strategy == "reduce":
...@@ -187,6 +187,8 @@ def train_parallel(train_args, test_args, args, train_prog, test_prog, ...@@ -187,6 +187,8 @@ def train_parallel(train_args, test_args, args, train_prog, test_prog,
num_trainers = 1 num_trainers = 1
trainer_id = 0 trainer_id = 0
print('Use parallel_executor')
strategy.type = 2
exe = fluid.ParallelExecutor( exe = fluid.ParallelExecutor(
True, True,
avg_loss.name, avg_loss.name,
......
...@@ -172,7 +172,7 @@ def get_model(args, is_train, main_prog, startup_prog): ...@@ -172,7 +172,7 @@ def get_model(args, is_train, main_prog, startup_prog):
reader, dshape, class_dim = _model_reader_dshape_classdim(args, is_train) reader, dshape, class_dim = _model_reader_dshape_classdim(args, is_train)
pyreader = None pyreader = None
trainer_count = int(os.getenv("PADDLE_TRAINERS")) trainer_count = int(os.getenv("PADDLE_TRAINERS", 1))
with fluid.program_guard(main_prog, startup_prog): with fluid.program_guard(main_prog, startup_prog):
with fluid.unique_name.guard(): with fluid.unique_name.guard():
if args.use_reader_op: if args.use_reader_op:
......
...@@ -48,8 +48,11 @@ cc_library(allocator_facade SRCS allocator_facade.cc DEPS ...@@ -48,8 +48,11 @@ cc_library(allocator_facade SRCS allocator_facade.cc DEPS
auto_increment_allocator auto_increment_allocator
zero_size_allocator zero_size_allocator
conditional_allocator conditional_allocator
retry_allocator
cuda_device_guard) cuda_device_guard)
nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade) nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade)
cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator naive_managed_allocator best_fit_allocator locked_allocator cpu_allocator) cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator naive_managed_allocator best_fit_allocator locked_allocator cpu_allocator)
cc_test(allocator_facade_test SRCS allocator_facade_test.cc DEPS allocator_facade)
...@@ -26,6 +26,11 @@ std::shared_ptr<Allocation> ThinAlignedAllocator::AllocateShared( ...@@ -26,6 +26,11 @@ std::shared_ptr<Allocation> ThinAlignedAllocator::AllocateShared(
size_t size, Allocator::Attr attr) { size_t size, Allocator::Attr attr) {
return std::shared_ptr<Allocation>(Allocate(size, attr).release()); return std::shared_ptr<Allocation>(Allocate(size, attr).release());
} }
bool ThinAlignedAllocator::IsAllocThreadSafe() const {
return underlying_allocator_->IsAllocThreadSafe();
}
} // namespace allocation } // namespace allocation
} // namespace memory } // namespace memory
} // namespace paddle } // namespace paddle
...@@ -77,6 +77,8 @@ class ThinAlignedAllocator : public ManagedAllocator { ...@@ -77,6 +77,8 @@ class ThinAlignedAllocator : public ManagedAllocator {
std::shared_ptr<Allocation> AllocateShared(size_t size, Attr attr) override; std::shared_ptr<Allocation> AllocateShared(size_t size, Attr attr) override;
bool IsAllocThreadSafe() const;
protected: protected:
std::shared_ptr<ManagedAllocator> underlying_allocator_; std::shared_ptr<ManagedAllocator> underlying_allocator_;
}; };
......
...@@ -13,7 +13,9 @@ ...@@ -13,7 +13,9 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/memory/allocation/allocator.h"
#include <gflags/gflags.h>
#include <map> #include <map>
#include <unordered_map>
#include <vector> #include <vector>
#include "paddle/fluid/memory/allocation/aligned_allocator.h" #include "paddle/fluid/memory/allocation/aligned_allocator.h"
#include "paddle/fluid/memory/allocation/allocator_facade.h" #include "paddle/fluid/memory/allocation/allocator_facade.h"
...@@ -24,6 +26,7 @@ ...@@ -24,6 +26,7 @@
#include "paddle/fluid/memory/allocation/locked_allocator.h" #include "paddle/fluid/memory/allocation/locked_allocator.h"
#include "paddle/fluid/memory/allocation/naive_managed_allocator.h" #include "paddle/fluid/memory/allocation/naive_managed_allocator.h"
#include "paddle/fluid/memory/allocation/pinned_allocator.h" #include "paddle/fluid/memory/allocation/pinned_allocator.h"
#include "paddle/fluid/memory/allocation/retry_allocator.h"
#include "paddle/fluid/memory/allocation/zero_size_allocator.h" #include "paddle/fluid/memory/allocation/zero_size_allocator.h"
#include "paddle/fluid/platform/cuda_device_guard.h" #include "paddle/fluid/platform/cuda_device_guard.h"
#include "paddle/fluid/platform/gpu_info.h" #include "paddle/fluid/platform/gpu_info.h"
...@@ -32,6 +35,11 @@ ...@@ -32,6 +35,11 @@
#include "paddle/fluid/memory/allocation/cuda_allocator.h" #include "paddle/fluid/memory/allocation/cuda_allocator.h"
#endif #endif
DEFINE_int32(
gpu_allocator_retry_time, 0,
"The retry time (milliseconds) when allocator fails "
"to allocate memory. No retry if this value is not greater than 0");
namespace paddle { namespace paddle {
namespace memory { namespace memory {
namespace allocation { namespace allocation {
...@@ -60,6 +68,7 @@ class CPUManagedAllocator : public ManagedAllocator { ...@@ -60,6 +68,7 @@ class CPUManagedAllocator : public ManagedAllocator {
return normal_allocator_->AllocateShared(size, attr); return normal_allocator_->AllocateShared(size, attr);
} }
} }
bool IsAllocThreadSafe() const override { return true; } bool IsAllocThreadSafe() const override { return true; }
private: private:
...@@ -86,8 +95,12 @@ class CUDAManagedAllocator : public ManagedAllocator { ...@@ -86,8 +95,12 @@ class CUDAManagedAllocator : public ManagedAllocator {
size_t capacity = available / max_chunk_size_; size_t capacity = available / max_chunk_size_;
if (capacity == 1) { if (capacity == 1) {
VLOG(10) << "Create BestFitAllocator with chunk_size "
<< max_chunk_size_;
default_allocator_ = BestFitAllocatorCreator(); default_allocator_ = BestFitAllocatorCreator();
} else { } else {
VLOG(10) << "Create AutoIncrementAllocator with chunk_size "
<< max_chunk_size_ << " and capacity " << capacity;
default_allocator_ = std::make_shared<AutoIncrementAllocator>( default_allocator_ = std::make_shared<AutoIncrementAllocator>(
[this] { return std::move(BestFitAllocatorCreator()); }, capacity); [this] { return std::move(BestFitAllocatorCreator()); }, capacity);
} }
...@@ -116,6 +129,7 @@ class CUDAManagedAllocator : public ManagedAllocator { ...@@ -116,6 +129,7 @@ class CUDAManagedAllocator : public ManagedAllocator {
std::unique_ptr<Allocation> Allocate(size_t size, Attr attr) override { std::unique_ptr<Allocation> Allocate(size_t size, Attr attr) override {
return default_allocator_->Allocate(size, attr); return default_allocator_->Allocate(size, attr);
} }
std::shared_ptr<Allocation> AllocateShared(size_t size, Attr attr) override { std::shared_ptr<Allocation> AllocateShared(size_t size, Attr attr) override {
return default_allocator_->AllocateShared(size, attr); return default_allocator_->AllocateShared(size, attr);
} }
...@@ -123,10 +137,20 @@ class CUDAManagedAllocator : public ManagedAllocator { ...@@ -123,10 +137,20 @@ class CUDAManagedAllocator : public ManagedAllocator {
std::shared_ptr<ManagedAllocator> BestFitAllocatorCreator() { std::shared_ptr<ManagedAllocator> BestFitAllocatorCreator() {
chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_)); chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_));
auto* allocation = chunks_.back().get(); auto* allocation = chunks_.back().get();
std::unique_ptr<Allocator> unmanaged_allocator(new LockedAllocator(
std::unique_ptr<Allocator>(new BestFitAllocator(allocation))));
if (FLAGS_gpu_allocator_retry_time <= 0) {
VLOG(10) << "Create NaiveManagedAllocator without retry";
return std::make_shared<AlignedAllocator<64u>>( return std::make_shared<AlignedAllocator<64u>>(
NaiveManagedAllocator::Create(std::unique_ptr<Allocator>( NaiveManagedAllocator::Create(std::move(unmanaged_allocator)));
new LockedAllocator(std::unique_ptr<Allocator>( } else {
new BestFitAllocator(allocation)))))); VLOG(10) << "Create RetryAllocator with retry_time "
<< FLAGS_gpu_allocator_retry_time << "ms";
return std::make_shared<AlignedAllocator<64u>>(RetryAllocator::Create(
std::move(unmanaged_allocator),
static_cast<size_t>(FLAGS_gpu_allocator_retry_time)));
}
} }
bool IsAllocThreadSafe() const override { return true; } bool IsAllocThreadSafe() const override { return true; }
...@@ -141,7 +165,8 @@ class CUDAManagedAllocator : public ManagedAllocator { ...@@ -141,7 +165,8 @@ class CUDAManagedAllocator : public ManagedAllocator {
class AllocatorFacadePrivate { class AllocatorFacadePrivate {
public: public:
std::map<platform::Place, std::shared_ptr<ManagedAllocator>> allocators_; std::unordered_map<platform::Place, std::shared_ptr<ManagedAllocator>>
allocators_;
~AllocatorFacadePrivate() = default; ~AllocatorFacadePrivate() = default;
...@@ -184,13 +209,13 @@ AllocatorFacade& AllocatorFacade::Instance() { ...@@ -184,13 +209,13 @@ AllocatorFacade& AllocatorFacade::Instance() {
std::shared_ptr<Allocation> AllocatorFacade::AllocShared( std::shared_ptr<Allocation> AllocatorFacade::AllocShared(
const platform::Place& place, size_t size, Allocator::Attr attr) { const platform::Place& place, size_t size, Allocator::Attr attr) {
return m_->allocators_[place]->AllocateShared(size, attr); return m_->allocators_.at(place)->AllocateShared(size, attr);
} }
std::unique_ptr<Allocation> AllocatorFacade::Alloc(const platform::Place& place, std::unique_ptr<Allocation> AllocatorFacade::Alloc(const platform::Place& place,
size_t size, size_t size,
Allocator::Attr attr) { Allocator::Attr attr) {
return m_->allocators_[place]->Allocate(size, attr); return m_->allocators_.at(place)->Allocate(size, attr);
} }
} // namespace allocation } // namespace allocation
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include <gflags/gflags.h>
#include <gtest/gtest.h>
DECLARE_double(fraction_of_gpu_memory_to_use);
DECLARE_int32(gpu_allocator_retry_time);
namespace paddle {
namespace memory {
namespace allocation {
TEST(allocator, allocator) {
FLAGS_fraction_of_gpu_memory_to_use = 0.01;
FLAGS_gpu_allocator_retry_time = 500;
auto &instance = AllocatorFacade::Instance();
{
auto cpu_allocation = instance.Alloc(platform::CPUPlace(), 1024);
ASSERT_NE(cpu_allocation, nullptr);
}
{
auto gpu_allocation = instance.Alloc(platform::CUDAPlace(0), 1024);
ASSERT_NE(gpu_allocation, nullptr);
}
{
// Allocate 2GB gpu memory
auto gpu_allocation = instance.Alloc(platform::CUDAPlace(0),
2 * static_cast<size_t>(1 << 30));
ASSERT_NE(gpu_allocation, nullptr);
}
{}
}
} // namespace allocation
} // namespace memory
} // namespace paddle
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <functional>
#include <iostream> #include <iostream>
#include <vector> #include <vector>
...@@ -130,5 +131,65 @@ typename Visitor::result_type VisitPlace(const Place &place, ...@@ -130,5 +131,65 @@ typename Visitor::result_type VisitPlace(const Place &place,
return boost::apply_visitor(PlaceVisitorWrapper<Visitor>(visitor), place); return boost::apply_visitor(PlaceVisitorWrapper<Visitor>(visitor), place);
} }
struct PlaceHashVisitor : public boost::static_visitor<size_t> {
template <typename Place>
inline size_t operator()(const Place &place) const {
return place.hash();
}
};
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
namespace std {
template <>
struct hash<::paddle::platform::CPUPlace> {
using argument_type = ::paddle::platform::CPUPlace;
using result_type = size_t;
constexpr inline result_type operator()(const argument_type &place) const {
return static_cast<result_type>(-1);
}
};
template <>
struct hash<::paddle::platform::CUDAPlace> {
using argument_type = ::paddle::platform::CUDAPlace;
using result_type = size_t;
inline result_type operator()(const argument_type &place) const {
return static_cast<result_type>(place.device);
}
};
template <>
struct hash<::paddle::platform::CUDAPinnedPlace> {
using argument_type = ::paddle::platform::CUDAPinnedPlace;
using result_type = size_t;
constexpr inline result_type operator()(const argument_type &place) const {
return static_cast<result_type>(-2);
}
};
namespace { // NOLINT
struct PlaceHashVisitor : public boost::static_visitor<size_t> {
template <typename Place>
inline size_t operator()(const Place &place) const {
return std::hash<Place>()(place);
}
};
}
template <>
struct hash<::paddle::platform::Place> {
using argument_type = ::paddle::platform::Place;
using result_type = size_t;
inline result_type operator()(const argument_type &place) const {
return boost::apply_visitor(PlaceHashVisitor(), place);
}
};
} // namespace std
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册