提交 21fdf8e8 编写于 作者: S sneaxiy

add unittest for allocator_facade.cc

上级 64d94596
......@@ -168,7 +168,7 @@ def train_parallel(train_args, test_args, args, train_prog, test_prog,
startup_exe = fluid.Executor(place)
startup_exe.run(startup_prog)
strategy = fluid.ExecutionStrategy()
strategy.num_threads = args.cpus
strategy.num_threads = 0 #args.cpus
strategy.allow_op_delay = False
build_strategy = fluid.BuildStrategy()
if args.reduce_strategy == "reduce":
......@@ -187,6 +187,8 @@ def train_parallel(train_args, test_args, args, train_prog, test_prog,
num_trainers = 1
trainer_id = 0
print('Use parallel_executor')
strategy.type = 2
exe = fluid.ParallelExecutor(
True,
avg_loss.name,
......
......@@ -172,7 +172,7 @@ def get_model(args, is_train, main_prog, startup_prog):
reader, dshape, class_dim = _model_reader_dshape_classdim(args, is_train)
pyreader = None
trainer_count = int(os.getenv("PADDLE_TRAINERS"))
trainer_count = int(os.getenv("PADDLE_TRAINERS", 1))
with fluid.program_guard(main_prog, startup_prog):
with fluid.unique_name.guard():
if args.use_reader_op:
......
......@@ -48,8 +48,11 @@ cc_library(allocator_facade SRCS allocator_facade.cc DEPS
auto_increment_allocator
zero_size_allocator
conditional_allocator
retry_allocator
cuda_device_guard)
nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade)
cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator naive_managed_allocator best_fit_allocator locked_allocator cpu_allocator)
cc_test(allocator_facade_test SRCS allocator_facade_test.cc DEPS allocator_facade)
......@@ -26,6 +26,11 @@ std::shared_ptr<Allocation> ThinAlignedAllocator::AllocateShared(
size_t size, Allocator::Attr attr) {
return std::shared_ptr<Allocation>(Allocate(size, attr).release());
}
bool ThinAlignedAllocator::IsAllocThreadSafe() const {
return underlying_allocator_->IsAllocThreadSafe();
}
} // namespace allocation
} // namespace memory
} // namespace paddle
......@@ -77,6 +77,8 @@ class ThinAlignedAllocator : public ManagedAllocator {
std::shared_ptr<Allocation> AllocateShared(size_t size, Attr attr) override;
bool IsAllocThreadSafe() const;
protected:
std::shared_ptr<ManagedAllocator> underlying_allocator_;
};
......
......@@ -13,7 +13,9 @@
// limitations under the License.
#include "paddle/fluid/memory/allocation/allocator.h"
#include <gflags/gflags.h>
#include <map>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/memory/allocation/aligned_allocator.h"
#include "paddle/fluid/memory/allocation/allocator_facade.h"
......@@ -24,6 +26,7 @@
#include "paddle/fluid/memory/allocation/locked_allocator.h"
#include "paddle/fluid/memory/allocation/naive_managed_allocator.h"
#include "paddle/fluid/memory/allocation/pinned_allocator.h"
#include "paddle/fluid/memory/allocation/retry_allocator.h"
#include "paddle/fluid/memory/allocation/zero_size_allocator.h"
#include "paddle/fluid/platform/cuda_device_guard.h"
#include "paddle/fluid/platform/gpu_info.h"
......@@ -32,6 +35,11 @@
#include "paddle/fluid/memory/allocation/cuda_allocator.h"
#endif
DEFINE_int32(
gpu_allocator_retry_time, 0,
"The retry time (milliseconds) when allocator fails "
"to allocate memory. No retry if this value is not greater than 0");
namespace paddle {
namespace memory {
namespace allocation {
......@@ -60,6 +68,7 @@ class CPUManagedAllocator : public ManagedAllocator {
return normal_allocator_->AllocateShared(size, attr);
}
}
bool IsAllocThreadSafe() const override { return true; }
private:
......@@ -86,8 +95,12 @@ class CUDAManagedAllocator : public ManagedAllocator {
size_t capacity = available / max_chunk_size_;
if (capacity == 1) {
VLOG(10) << "Create BestFitAllocator with chunk_size "
<< max_chunk_size_;
default_allocator_ = BestFitAllocatorCreator();
} else {
VLOG(10) << "Create AutoIncrementAllocator with chunk_size "
<< max_chunk_size_ << " and capacity " << capacity;
default_allocator_ = std::make_shared<AutoIncrementAllocator>(
[this] { return std::move(BestFitAllocatorCreator()); }, capacity);
}
......@@ -116,6 +129,7 @@ class CUDAManagedAllocator : public ManagedAllocator {
std::unique_ptr<Allocation> Allocate(size_t size, Attr attr) override {
return default_allocator_->Allocate(size, attr);
}
std::shared_ptr<Allocation> AllocateShared(size_t size, Attr attr) override {
return default_allocator_->AllocateShared(size, attr);
}
......@@ -123,10 +137,20 @@ class CUDAManagedAllocator : public ManagedAllocator {
std::shared_ptr<ManagedAllocator> BestFitAllocatorCreator() {
chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_));
auto* allocation = chunks_.back().get();
return std::make_shared<AlignedAllocator<64u>>(
NaiveManagedAllocator::Create(std::unique_ptr<Allocator>(
new LockedAllocator(std::unique_ptr<Allocator>(
new BestFitAllocator(allocation))))));
std::unique_ptr<Allocator> unmanaged_allocator(new LockedAllocator(
std::unique_ptr<Allocator>(new BestFitAllocator(allocation))));
if (FLAGS_gpu_allocator_retry_time <= 0) {
VLOG(10) << "Create NaiveManagedAllocator without retry";
return std::make_shared<AlignedAllocator<64u>>(
NaiveManagedAllocator::Create(std::move(unmanaged_allocator)));
} else {
VLOG(10) << "Create RetryAllocator with retry_time "
<< FLAGS_gpu_allocator_retry_time << "ms";
return std::make_shared<AlignedAllocator<64u>>(RetryAllocator::Create(
std::move(unmanaged_allocator),
static_cast<size_t>(FLAGS_gpu_allocator_retry_time)));
}
}
bool IsAllocThreadSafe() const override { return true; }
......@@ -141,7 +165,8 @@ class CUDAManagedAllocator : public ManagedAllocator {
class AllocatorFacadePrivate {
public:
std::map<platform::Place, std::shared_ptr<ManagedAllocator>> allocators_;
std::unordered_map<platform::Place, std::shared_ptr<ManagedAllocator>>
allocators_;
~AllocatorFacadePrivate() = default;
......@@ -184,13 +209,13 @@ AllocatorFacade& AllocatorFacade::Instance() {
std::shared_ptr<Allocation> AllocatorFacade::AllocShared(
const platform::Place& place, size_t size, Allocator::Attr attr) {
return m_->allocators_[place]->AllocateShared(size, attr);
return m_->allocators_.at(place)->AllocateShared(size, attr);
}
std::unique_ptr<Allocation> AllocatorFacade::Alloc(const platform::Place& place,
size_t size,
Allocator::Attr attr) {
return m_->allocators_[place]->Allocate(size, attr);
return m_->allocators_.at(place)->Allocate(size, attr);
}
} // namespace allocation
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include <gflags/gflags.h>
#include <gtest/gtest.h>
DECLARE_double(fraction_of_gpu_memory_to_use);
DECLARE_int32(gpu_allocator_retry_time);
namespace paddle {
namespace memory {
namespace allocation {
TEST(allocator, allocator) {
FLAGS_fraction_of_gpu_memory_to_use = 0.01;
FLAGS_gpu_allocator_retry_time = 500;
auto &instance = AllocatorFacade::Instance();
{
auto cpu_allocation = instance.Alloc(platform::CPUPlace(), 1024);
ASSERT_NE(cpu_allocation, nullptr);
}
{
auto gpu_allocation = instance.Alloc(platform::CUDAPlace(0), 1024);
ASSERT_NE(gpu_allocation, nullptr);
}
{
// Allocate 2GB gpu memory
auto gpu_allocation = instance.Alloc(platform::CUDAPlace(0),
2 * static_cast<size_t>(1 << 30));
ASSERT_NE(gpu_allocation, nullptr);
}
{}
}
} // namespace allocation
} // namespace memory
} // namespace paddle
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <functional>
#include <iostream>
#include <vector>
......@@ -130,5 +131,65 @@ typename Visitor::result_type VisitPlace(const Place &place,
return boost::apply_visitor(PlaceVisitorWrapper<Visitor>(visitor), place);
}
struct PlaceHashVisitor : public boost::static_visitor<size_t> {
template <typename Place>
inline size_t operator()(const Place &place) const {
return place.hash();
}
};
} // namespace platform
} // namespace paddle
namespace std {
template <>
struct hash<::paddle::platform::CPUPlace> {
using argument_type = ::paddle::platform::CPUPlace;
using result_type = size_t;
constexpr inline result_type operator()(const argument_type &place) const {
return static_cast<result_type>(-1);
}
};
template <>
struct hash<::paddle::platform::CUDAPlace> {
using argument_type = ::paddle::platform::CUDAPlace;
using result_type = size_t;
inline result_type operator()(const argument_type &place) const {
return static_cast<result_type>(place.device);
}
};
template <>
struct hash<::paddle::platform::CUDAPinnedPlace> {
using argument_type = ::paddle::platform::CUDAPinnedPlace;
using result_type = size_t;
constexpr inline result_type operator()(const argument_type &place) const {
return static_cast<result_type>(-2);
}
};
namespace { // NOLINT
struct PlaceHashVisitor : public boost::static_visitor<size_t> {
template <typename Place>
inline size_t operator()(const Place &place) const {
return std::hash<Place>()(place);
}
};
}
template <>
struct hash<::paddle::platform::Place> {
using argument_type = ::paddle::platform::Place;
using result_type = size_t;
inline result_type operator()(const argument_type &place) const {
return boost::apply_visitor(PlaceHashVisitor(), place);
}
};
} // namespace std
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册