allocator_facade.cc 5.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/memory/allocation/allocator.h"
S
sneaxiy 已提交
16
#include <gflags/gflags.h>
17
#include <map>
Y
Yu Yang 已提交
18
#include <string>
S
sneaxiy 已提交
19
#include <unordered_map>
S
sneaxiy 已提交
20
#include <utility>
21 22
#include <vector>
#include "paddle/fluid/memory/allocation/allocator_facade.h"
Y
Yu Yang 已提交
23
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
24
#include "paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h"
25 26
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
#include "paddle/fluid/memory/allocation/locked_allocator.h"
27
#include "paddle/fluid/memory/allocation/naive_best_fit_allocator.h"
S
sneaxiy 已提交
28
#include "paddle/fluid/memory/allocation/retry_allocator.h"
S
sneaxiy 已提交
29
#include "paddle/fluid/platform/cpu_info.h"
S
sneaxiy 已提交
30
#include "paddle/fluid/platform/enforce.h"
31 32 33
#include "paddle/fluid/platform/place.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/memory/allocation/cuda_allocator.h"
S
sneaxiy 已提交
34 35 36
#include "paddle/fluid/memory/allocation/pinned_allocator.h"
#include "paddle/fluid/platform/cuda_device_guard.h"
#include "paddle/fluid/platform/gpu_info.h"
37 38
#endif

S
sneaxiy 已提交
39
DEFINE_int64(
S
sneaxiy 已提交
40 41 42 43
    gpu_allocator_retry_time, 0,
    "The retry time (milliseconds) when allocator fails "
    "to allocate memory. No retry if this value is not greater than 0");

44 45 46 47
namespace paddle {
namespace memory {
namespace allocation {

Y
Yu Yang 已提交
48 49
class AllocatorFacadePrivate {
 public:
50
  AllocatorFacadePrivate() {
Z
Zeng Jinle 已提交
51 52
    auto strategy = GetAllocatorStrategy();
    switch (strategy) {
53 54 55 56 57 58 59 60 61
      case AllocatorStrategy::kNaiveBestFit: {
        InitNaiveBestFitCPUAllocator();
#ifdef PADDLE_WITH_CUDA
        for (int dev_id = 0; dev_id < platform::GetCUDADeviceCount();
             ++dev_id) {
          InitNaiveBestFitCUDAAllocator(platform::CUDAPlace(dev_id));
        }
        InitNaiveBestFitCUDAPinnedAllocator();
#endif
Z
Zeng Jinle 已提交
62 63
        break;
      }
64 65 66 67 68 69 70 71 72 73

      case AllocatorStrategy::kAutoGrowth: {
        InitNaiveBestFitCPUAllocator();
#ifdef PADDLE_WITH_CUDA
        for (int dev_id = 0; dev_id < platform::GetCUDADeviceCount();
             ++dev_id) {
          InitAutoGrowthCUDAAllocator(platform::CUDAPlace(dev_id));
        }
        InitNaiveBestFitCUDAPinnedAllocator();
#endif
Z
Zeng Jinle 已提交
74 75
        break;
      }
76

Z
Zeng Jinle 已提交
77 78 79 80
      default: {
        PADDLE_THROW("Unsupported allocator strategy: %d",
                     static_cast<int>(strategy));
      }
Y
Yu Yang 已提交
81
    }
Z
Zeng Jinle 已提交
82 83 84 85 86 87 88
    InitZeroSizeAllocators();
  }

  inline const std::shared_ptr<Allocator>& GetAllocator(
      const platform::Place& place, size_t size) {
    const auto& allocators = (size > 0 ? allocators_ : zero_size_allocators_);
    auto iter = allocators.find(place);
89 90
    PADDLE_ENFORCE(iter != allocators.end(),
                   "No such allocator for the place, %s", place);
Z
Zeng Jinle 已提交
91
    return iter->second;
92 93 94
  }

 private:
95 96 97
  void InitNaiveBestFitCPUAllocator() {
    allocators_[platform::CPUPlace()] =
        std::make_shared<NaiveBestFitAllocator>(platform::CPUPlace());
Y
Yu Yang 已提交
98 99
  }

100 101 102 103
#ifdef PADDLE_WITH_CUDA
  void InitNaiveBestFitCUDAPinnedAllocator() {
    allocators_[platform::CUDAPinnedPlace()] =
        std::make_shared<NaiveBestFitAllocator>(platform::CUDAPinnedPlace());
104 105
  }

106 107
  void InitNaiveBestFitCUDAAllocator(platform::CUDAPlace p) {
    allocators_[p] = std::make_shared<NaiveBestFitAllocator>(p);
108
  }
Y
Yu Yang 已提交
109

110 111 112 113
  void InitAutoGrowthCUDAAllocator(platform::CUDAPlace p) {
    auto cuda_allocator = std::make_shared<CUDAAllocator>(p);
    allocators_[p] = std::make_shared<AutoGrowthBestFitAllocator>(
        cuda_allocator, platform::GpuMinChunkSize());
S
sneaxiy 已提交
114
  }
115
#endif
S
sneaxiy 已提交
116

Z
Zeng Jinle 已提交
117 118 119 120 121
  class ZeroSizeAllocator : public Allocator {
   public:
    explicit ZeroSizeAllocator(platform::Place place) : place_(place) {}

   protected:
122
    Allocation* AllocateImpl(size_t size) override {
Z
Zeng Jinle 已提交
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
      return new Allocation(nullptr, 0, place_);
    }

    void FreeImpl(Allocation* allocation) override { delete allocation; }

   private:
    platform::Place place_;
  };

  void InitZeroSizeAllocators() {
    std::vector<platform::Place> places;
    places.emplace_back(platform::CPUPlace());
#ifdef PADDLE_WITH_CUDA
    int device_count = platform::GetCUDADeviceCount();
    for (int dev_id = 0; dev_id < device_count; ++dev_id) {
      places.emplace_back(platform::CUDAPlace(dev_id));
    }
    places.emplace_back(platform::CUDAPinnedPlace());
#endif

    for (auto& p : places) {
      zero_size_allocators_[p] = std::make_shared<ZeroSizeAllocator>(p);
Y
Yu Yang 已提交
145 146
    }
  }
Z
Zeng Jinle 已提交
147 148 149 150

 private:
  std::map<platform::Place, std::shared_ptr<Allocator>> allocators_;
  std::map<platform::Place, std::shared_ptr<Allocator>> zero_size_allocators_;
151 152
};

Y
Refine  
Yu Yang 已提交
153
// Pimpl. Make interface clean.
154
AllocatorFacade::AllocatorFacade() : m_(new AllocatorFacadePrivate()) {}
155 156 157
// delete m_ may cause core dump when the destructor of python in conflict with
// cpp.
AllocatorFacade::~AllocatorFacade() {}
158 159 160 161 162 163 164

AllocatorFacade& AllocatorFacade::Instance() {
  static AllocatorFacade instance;
  return instance;
}

std::shared_ptr<Allocation> AllocatorFacade::AllocShared(
165 166
    const platform::Place& place, size_t size) {
  return std::shared_ptr<Allocation>(Alloc(place, size));
167 168
}

169 170 171
AllocationPtr AllocatorFacade::Alloc(const platform::Place& place,
                                     size_t size) {
  return m_->GetAllocator(place, size)->Allocate(size);
172 173 174 175 176
}

}  // namespace allocation
}  // namespace memory
}  // namespace paddle