lazy init of allocators, test=develop (#20854)

bb8d7783 · Zeng Jinle · GitHub · aacd16db · bb8d7783
显示空白变更内容
内联并排

Showing with 43 addition and 24 deletion

paddle/fluid/memory/allocation/naive_best_fit_allocator.cc paddle/fluid/memory/allocation/naive_best_fit_allocator.cc +43 -24

未找到文件。
--- a/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
+++ b/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
@@ -101,25 +101,38 @@ size_t Used<platform::CPUPlace>(const platform::CPUPlace &place) {
 }
 #ifdef PADDLE_WITH_CUDA
-BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) {
+class GPUBuddyAllocatorList {
-  static std::once_flag init_flag;
+ private:
-  static detail::BuddyAllocator **a_arr = nullptr;
+  GPUBuddyAllocatorList() : devices_(platform::GetSelectedDevices()) {
-  static std::vector<int> devices;
+    auto gpu_num = devices_.size();
+    allocators_.resize(gpu_num);
-  std::call_once(init_flag, [gpu_id]() {
+    init_flags_.reserve(gpu_num);
-    devices = platform::GetSelectedDevices();
+    for (size_t i = 0; i < gpu_num; ++i) {
-    int gpu_num = devices.size();
+      init_flags_.emplace_back(new std::once_flag());
-    a_arr = new BuddyAllocator *[gpu_num];
+    }
+  }
-    for (size_t i = 0; i < devices.size(); ++i) {
-      int dev_id = devices[i];
+  static GPUBuddyAllocatorList *CreateNewInstance() {
-      a_arr[i] = nullptr;
+    return new GPUBuddyAllocatorList();
-      platform::SetDeviceId(dev_id);
+  }
-      a_arr[i] = new BuddyAllocator(std::unique_ptr<detail::SystemAllocator>(
-                                        new detail::GPUAllocator(dev_id)),
+ public:
-                                    platform::GpuMinChunkSize(),
+  static GPUBuddyAllocatorList *Instance() {
-                                    platform::GpuMaxChunkSize());
+    static auto *instance = CreateNewInstance();
+    return instance;
+  }
+  BuddyAllocator *Get(int gpu_id) {
+    auto pos = std::distance(
+        devices_.begin(), std::find(devices_.begin(), devices_.end(), gpu_id));
+    PADDLE_ENFORCE_LT(pos, devices_.size());
+    std::call_once(*init_flags_[pos], [this, pos] {
+      platform::SetDeviceId(devices_[pos]);
+      allocators_[pos].reset(new BuddyAllocator(
+          std::unique_ptr<detail::SystemAllocator>(
+              new detail::GPUAllocator(devices_[pos])),
+          platform::GpuMinChunkSize(), platform::GpuMaxChunkSize()));
      VLOG(10) << "\n\nNOTE:\n"
               << "You can set GFlags environment variable "
               << "'FLAGS_fraction_of_gpu_memory_to_use' "
@@ -132,13 +145,19 @@ BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) {
               << FLAGS_initial_gpu_memory_in_mb
               << ". Current 'FLAGS_reallocate_gpu_memory_in_mb' value is "
               << FLAGS_reallocate_gpu_memory_in_mb << "\n\n";
-    }
-    platform::SetDeviceId(gpu_id);
    });
-  auto pos = std::distance(devices.begin(),
+    return allocators_[pos].get();
-                           std::find(devices.begin(), devices.end(), gpu_id));
+  }
-  return a_arr[pos];
+ private:
+  std::vector<int> devices_;
+  std::vector<std::unique_ptr<std::once_flag>> init_flags_;
+  std::vector<std::unique_ptr<BuddyAllocator>> allocators_;
+};
+BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) {
+  return GPUBuddyAllocatorList::Instance()->Get(gpu_id);
 }
 #endif