add unittest for allocator_facade.cc

21fdf8e8 · sneaxiy · 64d94596 · 21fdf8e8 · 21fdf8e8 · 21fdf8e8
8 changed file
--- a/benchmark/fluid/fluid_benchmark.py
+++ b/benchmark/fluid/fluid_benchmark.py
@@ -168,7 +168,7 @@ def train_parallel(train_args, test_args, args, train_prog, test_prog,
    startup_exe = fluid.Executor(place)
    startup_exe.run(startup_prog)
    strategy = fluid.ExecutionStrategy()
-    strategy.num_threads = args.cpus
+    strategy.num_threads = 0  #args.cpus
    strategy.allow_op_delay = False
    build_strategy = fluid.BuildStrategy()
    if args.reduce_strategy == "reduce":
@@ -187,6 +187,8 @@ def train_parallel(train_args, test_args, args, train_prog, test_prog,
        num_trainers = 1
        trainer_id = 0

+    print('Use parallel_executor')
+    strategy.type = 2
    exe = fluid.ParallelExecutor(
        True,
        avg_loss.name,

--- a/benchmark/fluid/models/resnet.py
+++ b/benchmark/fluid/models/resnet.py
@@ -172,7 +172,7 @@ def get_model(args, is_train, main_prog, startup_prog):
    reader, dshape, class_dim = _model_reader_dshape_classdim(args, is_train)

    pyreader = None
-    trainer_count = int(os.getenv("PADDLE_TRAINERS"))
+    trainer_count = int(os.getenv("PADDLE_TRAINERS", 1))
    with fluid.program_guard(main_prog, startup_prog):
        with fluid.unique_name.guard():
            if args.use_reader_op:

--- a/paddle/fluid/memory/allocation/CMakeLists.txt
+++ b/paddle/fluid/memory/allocation/CMakeLists.txt
@@ -48,8 +48,11 @@ cc_library(allocator_facade SRCS allocator_facade.cc DEPS
        auto_increment_allocator
        zero_size_allocator
        conditional_allocator
+        retry_allocator
        cuda_device_guard)

 nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade)

 cc_test(retry_allocator_test SRCS retry_allocator_test.cc DEPS retry_allocator naive_managed_allocator best_fit_allocator locked_allocator cpu_allocator)
+
+cc_test(allocator_facade_test SRCS allocator_facade_test.cc DEPS allocator_facade)
--- a/paddle/fluid/memory/allocation/aligned_allocator.cc
+++ b/paddle/fluid/memory/allocation/aligned_allocator.cc
@@ -26,6 +26,11 @@ std::shared_ptr<Allocation> ThinAlignedAllocator::AllocateShared(
    size_t size, Allocator::Attr attr) {
  return std::shared_ptr<Allocation>(Allocate(size, attr).release());
 }
+
+bool ThinAlignedAllocator::IsAllocThreadSafe() const {
+  return underlying_allocator_->IsAllocThreadSafe();
+}
+
 }  // namespace allocation
 }  // namespace memory
 }  // namespace paddle
--- a/paddle/fluid/memory/allocation/aligned_allocator.h
+++ b/paddle/fluid/memory/allocation/aligned_allocator.h
@@ -77,6 +77,8 @@ class ThinAlignedAllocator : public ManagedAllocator {

  std::shared_ptr<Allocation> AllocateShared(size_t size, Attr attr) override;

+  bool IsAllocThreadSafe() const;
+
 protected:
  std::shared_ptr<ManagedAllocator> underlying_allocator_;
 };

--- a/paddle/fluid/memory/allocation/allocator_facade.cc
+++ b/paddle/fluid/memory/allocation/allocator_facade.cc
@@ -13,7 +13,9 @@
 // limitations under the License.

 #include "paddle/fluid/memory/allocation/allocator.h"
+#include <gflags/gflags.h>
 #include <map>
+#include <unordered_map>
 #include <vector>
 #include "paddle/fluid/memory/allocation/aligned_allocator.h"
 #include "paddle/fluid/memory/allocation/allocator_facade.h"
@@ -24,6 +26,7 @@
 #include "paddle/fluid/memory/allocation/locked_allocator.h"
 #include "paddle/fluid/memory/allocation/naive_managed_allocator.h"
 #include "paddle/fluid/memory/allocation/pinned_allocator.h"
+#include "paddle/fluid/memory/allocation/retry_allocator.h"
 #include "paddle/fluid/memory/allocation/zero_size_allocator.h"
 #include "paddle/fluid/platform/cuda_device_guard.h"
 #include "paddle/fluid/platform/gpu_info.h"
@@ -32,6 +35,11 @@
 #include "paddle/fluid/memory/allocation/cuda_allocator.h"
 #endif

+DEFINE_int32(
+    gpu_allocator_retry_time, 0,
+    "The retry time (milliseconds) when allocator fails "
+    "to allocate memory. No retry if this value is not greater than 0");
+
 namespace paddle {
 namespace memory {
 namespace allocation {
@@ -60,6 +68,7 @@ class CPUManagedAllocator : public ManagedAllocator {
      return normal_allocator_->AllocateShared(size, attr);
    }
  }
+
  bool IsAllocThreadSafe() const override { return true; }

 private:
@@ -86,8 +95,12 @@ class CUDAManagedAllocator : public ManagedAllocator {
      size_t capacity = available / max_chunk_size_;

      if (capacity == 1) {
+        VLOG(10) << "Create BestFitAllocator with chunk_size "
+                 << max_chunk_size_;
        default_allocator_ = BestFitAllocatorCreator();
      } else {
+        VLOG(10) << "Create AutoIncrementAllocator with chunk_size "
+                 << max_chunk_size_ << " and capacity " << capacity;
        default_allocator_ = std::make_shared<AutoIncrementAllocator>(
            [this] { return std::move(BestFitAllocatorCreator()); }, capacity);
      }
@@ -116,6 +129,7 @@ class CUDAManagedAllocator : public ManagedAllocator {
  std::unique_ptr<Allocation> Allocate(size_t size, Attr attr) override {
    return default_allocator_->Allocate(size, attr);
  }
+
  std::shared_ptr<Allocation> AllocateShared(size_t size, Attr attr) override {
    return default_allocator_->AllocateShared(size, attr);
  }
@@ -123,10 +137,20 @@ class CUDAManagedAllocator : public ManagedAllocator {
  std::shared_ptr<ManagedAllocator> BestFitAllocatorCreator() {
    chunks_.emplace_back(raw_allocator_->Allocate(max_chunk_size_));
    auto* allocation = chunks_.back().get();
+    std::unique_ptr<Allocator> unmanaged_allocator(new LockedAllocator(
+        std::unique_ptr<Allocator>(new BestFitAllocator(allocation))));
+
+    if (FLAGS_gpu_allocator_retry_time <= 0) {
+      VLOG(10) << "Create NaiveManagedAllocator without retry";
      return std::make_shared<AlignedAllocator<64u>>(
-        NaiveManagedAllocator::Create(std::unique_ptr<Allocator>(
-            new LockedAllocator(std::unique_ptr<Allocator>(
-                new BestFitAllocator(allocation))))));
+          NaiveManagedAllocator::Create(std::move(unmanaged_allocator)));
+    } else {
+      VLOG(10) << "Create RetryAllocator with retry_time "
+               << FLAGS_gpu_allocator_retry_time << "ms";
+      return std::make_shared<AlignedAllocator<64u>>(RetryAllocator::Create(
+          std::move(unmanaged_allocator),
+          static_cast<size_t>(FLAGS_gpu_allocator_retry_time)));
+    }
  }

  bool IsAllocThreadSafe() const override { return true; }
@@ -141,7 +165,8 @@ class CUDAManagedAllocator : public ManagedAllocator {

 class AllocatorFacadePrivate {
 public:
-  std::map<platform::Place, std::shared_ptr<ManagedAllocator>> allocators_;
+  std::unordered_map<platform::Place, std::shared_ptr<ManagedAllocator>>
+      allocators_;

  ~AllocatorFacadePrivate() = default;

@@ -184,13 +209,13 @@ AllocatorFacade& AllocatorFacade::Instance() {

 std::shared_ptr<Allocation> AllocatorFacade::AllocShared(
    const platform::Place& place, size_t size, Allocator::Attr attr) {
-  return m_->allocators_[place]->AllocateShared(size, attr);
+  return m_->allocators_.at(place)->AllocateShared(size, attr);
 }

 std::unique_ptr<Allocation> AllocatorFacade::Alloc(const platform::Place& place,
                                                   size_t size,
                                                   Allocator::Attr attr) {
-  return m_->allocators_[place]->Allocate(size, attr);
+  return m_->allocators_.at(place)->Allocate(size, attr);
 }

 }  // namespace allocation

--- a/paddle/fluid/memory/allocation/allocator_facade_test.cc
+++ b/paddle/fluid/memory/allocation/allocator_facade_test.cc
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/memory/allocation/allocator_facade.h"
+#include <gflags/gflags.h>
+#include <gtest/gtest.h>
+
+DECLARE_double(fraction_of_gpu_memory_to_use);
+DECLARE_int32(gpu_allocator_retry_time);
+
+namespace paddle {
+namespace memory {
+namespace allocation {
+
+TEST(allocator, allocator) {
+  FLAGS_fraction_of_gpu_memory_to_use = 0.01;
+  FLAGS_gpu_allocator_retry_time = 500;
+
+  auto &instance = AllocatorFacade::Instance();
+
+  {
+    auto cpu_allocation = instance.Alloc(platform::CPUPlace(), 1024);
+    ASSERT_NE(cpu_allocation, nullptr);
+  }
+
+  {
+    auto gpu_allocation = instance.Alloc(platform::CUDAPlace(0), 1024);
+    ASSERT_NE(gpu_allocation, nullptr);
+  }
+
+  {
+    // Allocate 2GB gpu memory
+    auto gpu_allocation = instance.Alloc(platform::CUDAPlace(0),
+                                         2 * static_cast<size_t>(1 << 30));
+    ASSERT_NE(gpu_allocation, nullptr);
+  }
+
+  {}
+}
+
+}  // namespace allocation
+}  // namespace memory
+}  // namespace paddle
--- a/paddle/fluid/platform/place.h
+++ b/paddle/fluid/platform/place.h
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 #pragma once

+#include <functional>
 #include <iostream>
 #include <vector>

@@ -130,5 +131,65 @@ typename Visitor::result_type VisitPlace(const Place &place,
  return boost::apply_visitor(PlaceVisitorWrapper<Visitor>(visitor), place);
 }

+struct PlaceHashVisitor : public boost::static_visitor<size_t> {
+  template <typename Place>
+  inline size_t operator()(const Place &place) const {
+    return place.hash();
+  }
+};
+
 }  // namespace platform
 }  // namespace paddle
+
+namespace std {
+
+template <>
+struct hash<::paddle::platform::CPUPlace> {
+  using argument_type = ::paddle::platform::CPUPlace;
+  using result_type = size_t;
+
+  constexpr inline result_type operator()(const argument_type &place) const {
+    return static_cast<result_type>(-1);
+  }
+};
+
+template <>
+struct hash<::paddle::platform::CUDAPlace> {
+  using argument_type = ::paddle::platform::CUDAPlace;
+  using result_type = size_t;
+
+  inline result_type operator()(const argument_type &place) const {
+    return static_cast<result_type>(place.device);
+  }
+};
+
+template <>
+struct hash<::paddle::platform::CUDAPinnedPlace> {
+  using argument_type = ::paddle::platform::CUDAPinnedPlace;
+  using result_type = size_t;
+
+  constexpr inline result_type operator()(const argument_type &place) const {
+    return static_cast<result_type>(-2);
+  }
+};
+
+namespace {  // NOLINT
+struct PlaceHashVisitor : public boost::static_visitor<size_t> {
+  template <typename Place>
+  inline size_t operator()(const Place &place) const {
+    return std::hash<Place>()(place);
+  }
+};
+}
+
+template <>
+struct hash<::paddle::platform::Place> {
+  using argument_type = ::paddle::platform::Place;
+  using result_type = size_t;
+
+  inline result_type operator()(const argument_type &place) const {
+    return boost::apply_visitor(PlaceHashVisitor(), place);
+  }
+};
+
+}  // namespace std