malloc.cc 6.3 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

G
gongweibao 已提交
15 16
#include <vector>

Y
Yi Wang 已提交
17
#include "paddle/fluid/memory/malloc.h"
18

L
liaogang 已提交
19 20
#include "glog/logging.h"

Y
Yi Wang 已提交
21 22 23
#include "paddle/fluid/memory/detail/buddy_allocator.h"
#include "paddle/fluid/memory/detail/system_allocator.h"
#include "paddle/fluid/platform/gpu_info.h"
L
liaogang 已提交
24

25 26 27 28 29 30
DEFINE_bool(init_allocated_mem, false,
            "It is a mistake that the values of the memory allocated by "
            "BuddyAllocator are always zeroed in some op's implementation. "
            "To find this error in time, we use init_allocated_mem to indicate "
            "that initializing the allocated memory with a small value "
            "during unit testing.");
L
liaogang 已提交
31
DECLARE_double(fraction_of_gpu_memory_to_use);
L
liaogang 已提交
32

33 34 35
namespace paddle {
namespace memory {

36 37 38
using BuddyAllocator = detail::BuddyAllocator;

BuddyAllocator* GetCPUBuddyAllocator() {
G
gongweibao 已提交
39
  static std::once_flag init_flag;
40
  static detail::BuddyAllocator* a = nullptr;
G
gongweibao 已提交
41 42 43 44 45 46 47

  std::call_once(init_flag, []() {
    a = new detail::BuddyAllocator(
        std::unique_ptr<detail::SystemAllocator>(new detail::CPUAllocator),
        platform::CpuMinChunkSize(), platform::CpuMaxChunkSize());
  });

48
  return a;
L
liaogang 已提交
49 50
}

L
liaogang 已提交
51
template <>
C
chengduoZH 已提交
52
void* Alloc<platform::CPUPlace>(platform::CPUPlace place, size_t size) {
53
  VLOG(10) << "Allocate " << size << " bytes on " << platform::Place(place);
54
  void* p = GetCPUBuddyAllocator()->Alloc(size);
55 56 57
  if (FLAGS_init_allocated_mem) {
    memset(p, 0xEF, size);
  }
58
  VLOG(10) << "  pointer=" << p;
59
  return p;
L
liaogang 已提交
60 61 62
}

template <>
C
chengduoZH 已提交
63
void Free<platform::CPUPlace>(platform::CPUPlace place, void* p) {
64
  VLOG(10) << "Free pointer=" << p << " on " << platform::Place(place);
L
liaogang 已提交
65 66 67 68 69 70 71 72
  GetCPUBuddyAllocator()->Free(p);
}

template <>
size_t Used<platform::CPUPlace>(platform::CPUPlace place) {
  return GetCPUBuddyAllocator()->Used();
}

73
#ifdef PADDLE_WITH_CUDA
L
liaogang 已提交
74

75
BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) {
G
gongweibao 已提交
76 77 78 79
  static std::once_flag init_flag;
  static detail::BuddyAllocator** a_arr = nullptr;

  std::call_once(init_flag, [gpu_id]() {
80
    int gpu_num = platform::GetCUDADeviceCount();
G
gongweibao 已提交
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
    PADDLE_ENFORCE(gpu_id < gpu_num, "gpu_id:%d should < gpu_num:%d", gpu_id,
                   gpu_num);

    a_arr = new BuddyAllocator*[gpu_num];
    for (int i = 0; i < gpu_num; i++) {
      a_arr[i] = nullptr;
      platform::SetDeviceId(i);
      a_arr[i] = new BuddyAllocator(
          std::unique_ptr<detail::SystemAllocator>(new detail::GPUAllocator(i)),
          platform::GpuMinChunkSize(), platform::GpuMaxChunkSize());

      VLOG(10) << "\n\nNOTE: each GPU device use "
               << FLAGS_fraction_of_gpu_memory_to_use * 100
               << "% of GPU memory.\n"
               << "You can set GFlags environment variable '"
               << "FLAGS_fraction_of_gpu_memory_to_use"
               << "' to change the fraction of GPU usage.\n\n";
L
liaogang 已提交
98
    }
G
gongweibao 已提交
99 100
  });

101
  platform::SetDeviceId(gpu_id);
G
gongweibao 已提交
102
  return a_arr[gpu_id];
L
liaogang 已提交
103 104
}

L
liaogang 已提交
105
template <>
D
dzhwinter 已提交
106
size_t Used<platform::CUDAPlace>(platform::CUDAPlace place) {
107
  return GetGPUBuddyAllocator(place.device)->Used();
L
liaogang 已提交
108
}
L
liaogang 已提交
109

L
liaogang 已提交
110
template <>
C
chengduoZH 已提交
111 112
void* Alloc<platform::CUDAPlace>(platform::CUDAPlace place, size_t size) {
  auto* buddy_allocator = GetGPUBuddyAllocator(place.device);
C
chengduoZH 已提交
113
  auto* ptr = buddy_allocator->Alloc(size);
114 115 116 117
  if (ptr == nullptr) {
    int cur_dev = platform::GetCurrentDeviceId();
    platform::SetDeviceId(place.device);
    size_t avail, total;
118
    platform::GpuMemoryUsage(&avail, &total);
119 120 121 122 123
    LOG(WARNING) << "Cannot allocate " << size << " bytes in GPU "
                 << place.device << ", available " << avail << " bytes";
    LOG(WARNING) << "total " << total;
    LOG(WARNING) << "GpuMinChunkSize " << platform::GpuMinChunkSize();
    LOG(WARNING) << "GpuMaxChunkSize " << platform::GpuMaxChunkSize();
D
dzhwinter 已提交
124
    LOG(WARNING) << "GPU memory used: " << Used<platform::CUDAPlace>(place);
125 126
    platform::SetDeviceId(cur_dev);
  }
127 128 129
  if (FLAGS_init_allocated_mem) {
    cudaMemset(ptr, 0xEF, size);
  }
130
  return ptr;
131 132
}

L
liaogang 已提交
133
template <>
C
chengduoZH 已提交
134 135 136 137
void Free<platform::CUDAPlace>(platform::CUDAPlace place, void* p) {
  GetGPUBuddyAllocator(place.device)->Free(p);
}

C
chengduoZH 已提交
138
BuddyAllocator* GetCUDAPinnedBuddyAllocator() {
G
gongweibao 已提交
139 140 141 142 143 144
  static std::once_flag init_flag;
  static BuddyAllocator* ba = nullptr;

  std::call_once(init_flag, []() {
    ba = new BuddyAllocator(std::unique_ptr<detail::SystemAllocator>(
                                new detail::CUDAPinnedAllocator),
145 146
                            platform::CUDAPinnedMinChunkSize(),
                            platform::CUDAPinnedMaxChunkSize());
G
gongweibao 已提交
147 148
  });

C
chengduoZH 已提交
149 150 151 152
  return ba;
}

template <>
C
chengduoZH 已提交
153
size_t Used<platform::CUDAPinnedPlace>(platform::CUDAPinnedPlace place) {
C
chengduoZH 已提交
154
  return GetCUDAPinnedBuddyAllocator()->Used();
C
chengduoZH 已提交
155 156 157 158 159
}

template <>
void* Alloc<platform::CUDAPinnedPlace>(platform::CUDAPinnedPlace place,
                                       size_t size) {
C
chengduoZH 已提交
160
  auto* buddy_allocator = GetCUDAPinnedBuddyAllocator();
C
chengduoZH 已提交
161 162
  void* ptr = buddy_allocator->Alloc(size);

C
chengduoZH 已提交
163 164 165 166
  if (ptr == nullptr) {
    LOG(WARNING) << "cudaMallocHost Cannot allocate " << size
                 << " bytes in CUDAPinnedPlace";
  }
167 168 169
  if (FLAGS_init_allocated_mem) {
    memset(ptr, 0xEF, size);
  }
C
chengduoZH 已提交
170 171 172 173 174
  return ptr;
}

template <>
void Free<platform::CUDAPinnedPlace>(platform::CUDAPinnedPlace place, void* p) {
C
chengduoZH 已提交
175
  GetCUDAPinnedBuddyAllocator()->Free(p);
176
}
L
Luo Tao 已提交
177
#endif
178

179 180 181 182 183 184 185 186 187 188 189 190
size_t Usage::operator()(const platform::CPUPlace& cpu) const {
  return Used(cpu);
}

size_t Usage::operator()(const platform::CUDAPlace& gpu) const {
#ifdef PADDLE_WITH_CUDA
  return Used(gpu);
#else
  PADDLE_THROW("'CUDAPlace' is not supported in CPU only device.");
#endif
}

C
chengduoZH 已提交
191
size_t Usage::operator()(const platform::CUDAPinnedPlace& cuda_pinned) const {
C
chengduoZH 已提交
192
#ifdef PADDLE_WITH_CUDA
C
chengduoZH 已提交
193
  return Used(cuda_pinned);
C
chengduoZH 已提交
194 195 196
#else
  PADDLE_THROW("'CUDAPinnedPlace' is not supported in CPU only device.");
#endif
C
chengduoZH 已提交
197 198
}

199 200 201 202
size_t memory_usage(const platform::Place& p) {
  return boost::apply_visitor(Usage(), p);
}

203 204
}  // namespace memory
}  // namespace paddle