malloc.cc 5.9 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

Y
Yi Wang 已提交
15
#include "paddle/fluid/memory/malloc.h"
16

L
liaogang 已提交
17 18
#include "glog/logging.h"

Y
Yi Wang 已提交
19 20 21
#include "paddle/fluid/memory/detail/buddy_allocator.h"
#include "paddle/fluid/memory/detail/system_allocator.h"
#include "paddle/fluid/platform/gpu_info.h"
L
liaogang 已提交
22

23 24 25 26 27 28
DEFINE_bool(init_allocated_mem, false,
            "It is a mistake that the values of the memory allocated by "
            "BuddyAllocator are always zeroed in some op's implementation. "
            "To find this error in time, we use init_allocated_mem to indicate "
            "that initializing the allocated memory with a small value "
            "during unit testing.");
L
liaogang 已提交
29
DECLARE_double(fraction_of_gpu_memory_to_use);
L
liaogang 已提交
30

31 32 33
namespace paddle {
namespace memory {

34 35 36
using BuddyAllocator = detail::BuddyAllocator;

BuddyAllocator* GetCPUBuddyAllocator() {
37 38 39 40 41 42 43
  static detail::BuddyAllocator* a = nullptr;
  if (a == nullptr) {
    a = new detail::BuddyAllocator(new detail::CPUAllocator,
                                   platform::CpuMinChunkSize(),
                                   platform::CpuMaxChunkSize());
  }
  return a;
L
liaogang 已提交
44 45
}

L
liaogang 已提交
46
template <>
C
chengduoZH 已提交
47
void* Alloc<platform::CPUPlace>(platform::CPUPlace place, size_t size) {
48
  VLOG(10) << "Allocate " << size << " bytes on " << platform::Place(place);
49
  void* p = GetCPUBuddyAllocator()->Alloc(size);
50 51 52
  if (FLAGS_init_allocated_mem) {
    memset(p, 0xEF, size);
  }
53
  VLOG(10) << "  pointer=" << p;
54
  return p;
L
liaogang 已提交
55 56 57
}

template <>
C
chengduoZH 已提交
58
void Free<platform::CPUPlace>(platform::CPUPlace place, void* p) {
59
  VLOG(10) << "Free pointer=" << p << " on " << platform::Place(place);
L
liaogang 已提交
60 61 62 63 64 65 66 67
  GetCPUBuddyAllocator()->Free(p);
}

template <>
size_t Used<platform::CPUPlace>(platform::CPUPlace place) {
  return GetCPUBuddyAllocator()->Used();
}

68
#ifdef PADDLE_WITH_CUDA
L
liaogang 已提交
69

70
BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) {
71 72
  static BuddyAllocator** as = NULL;
  if (as == NULL) {
73
    int gpu_num = platform::GetCUDADeviceCount();
74
    as = new BuddyAllocator*[gpu_num];
L
liaogang 已提交
75
    for (int gpu = 0; gpu < gpu_num; gpu++) {
76
      as[gpu] = nullptr;
L
liaogang 已提交
77
    }
78 79 80
  }
  platform::SetDeviceId(gpu_id);
  if (!as[gpu_id]) {
Y
Yu Yang 已提交
81
    as[gpu_id] = new BuddyAllocator(new detail::GPUAllocator(gpu_id),
82 83
                                    platform::GpuMinChunkSize(),
                                    platform::GpuMaxChunkSize());
84 85 86
    VLOG(10) << "\n\nNOTE: each GPU device use "
             << FLAGS_fraction_of_gpu_memory_to_use * 100
             << "% of GPU memory.\n"
87 88
             << "You can set GFlags environment variable '"
             << "FLAGS_fraction_of_gpu_memory_to_use"
89
             << "' to change the fraction of GPU usage.\n\n";
90 91
  }
  return as[gpu_id];
L
liaogang 已提交
92 93
}

L
liaogang 已提交
94
template <>
D
dzhwinter 已提交
95
size_t Used<platform::CUDAPlace>(platform::CUDAPlace place) {
96
  return GetGPUBuddyAllocator(place.device)->Used();
L
liaogang 已提交
97
}
L
liaogang 已提交
98

L
liaogang 已提交
99
template <>
C
chengduoZH 已提交
100 101
void* Alloc<platform::CUDAPlace>(platform::CUDAPlace place, size_t size) {
  auto* buddy_allocator = GetGPUBuddyAllocator(place.device);
C
chengduoZH 已提交
102
  auto* ptr = buddy_allocator->Alloc(size);
103 104 105 106
  if (ptr == nullptr) {
    int cur_dev = platform::GetCurrentDeviceId();
    platform::SetDeviceId(place.device);
    size_t avail, total;
107
    platform::GpuMemoryUsage(&avail, &total);
108 109 110 111 112
    LOG(WARNING) << "Cannot allocate " << size << " bytes in GPU "
                 << place.device << ", available " << avail << " bytes";
    LOG(WARNING) << "total " << total;
    LOG(WARNING) << "GpuMinChunkSize " << platform::GpuMinChunkSize();
    LOG(WARNING) << "GpuMaxChunkSize " << platform::GpuMaxChunkSize();
D
dzhwinter 已提交
113
    LOG(WARNING) << "GPU memory used: " << Used<platform::CUDAPlace>(place);
114 115
    platform::SetDeviceId(cur_dev);
  }
116 117 118
  if (FLAGS_init_allocated_mem) {
    cudaMemset(ptr, 0xEF, size);
  }
119
  return ptr;
120 121
}

L
liaogang 已提交
122
template <>
C
chengduoZH 已提交
123 124 125 126
void Free<platform::CUDAPlace>(platform::CUDAPlace place, void* p) {
  GetGPUBuddyAllocator(place.device)->Free(p);
}

C
chengduoZH 已提交
127 128 129 130
BuddyAllocator* GetCUDAPinnedBuddyAllocator() {
  static BuddyAllocator* ba = NULL;
  if (ba == NULL) {
    ba = new BuddyAllocator(new detail::CUDAPinnedAllocator,
131 132
                            platform::CUDAPinnedMinChunkSize(),
                            platform::CUDAPinnedMaxChunkSize());
C
chengduoZH 已提交
133 134 135 136 137
  }
  return ba;
}

template <>
C
chengduoZH 已提交
138
size_t Used<platform::CUDAPinnedPlace>(platform::CUDAPinnedPlace place) {
C
chengduoZH 已提交
139
  return GetCUDAPinnedBuddyAllocator()->Used();
C
chengduoZH 已提交
140 141 142 143 144
}

template <>
void* Alloc<platform::CUDAPinnedPlace>(platform::CUDAPinnedPlace place,
                                       size_t size) {
C
chengduoZH 已提交
145
  auto* buddy_allocator = GetCUDAPinnedBuddyAllocator();
C
chengduoZH 已提交
146 147
  void* ptr = buddy_allocator->Alloc(size);

C
chengduoZH 已提交
148 149 150 151
  if (ptr == nullptr) {
    LOG(WARNING) << "cudaMallocHost Cannot allocate " << size
                 << " bytes in CUDAPinnedPlace";
  }
152 153 154
  if (FLAGS_init_allocated_mem) {
    memset(ptr, 0xEF, size);
  }
C
chengduoZH 已提交
155 156 157 158 159
  return ptr;
}

template <>
void Free<platform::CUDAPinnedPlace>(platform::CUDAPinnedPlace place, void* p) {
C
chengduoZH 已提交
160
  GetCUDAPinnedBuddyAllocator()->Free(p);
161
}
L
Luo Tao 已提交
162
#endif
163

164 165 166 167 168 169 170 171 172 173 174 175
size_t Usage::operator()(const platform::CPUPlace& cpu) const {
  return Used(cpu);
}

size_t Usage::operator()(const platform::CUDAPlace& gpu) const {
#ifdef PADDLE_WITH_CUDA
  return Used(gpu);
#else
  PADDLE_THROW("'CUDAPlace' is not supported in CPU only device.");
#endif
}

C
chengduoZH 已提交
176
size_t Usage::operator()(const platform::CUDAPinnedPlace& cuda_pinned) const {
C
chengduoZH 已提交
177
#ifdef PADDLE_WITH_CUDA
C
chengduoZH 已提交
178
  return Used(cuda_pinned);
C
chengduoZH 已提交
179 180 181
#else
  PADDLE_THROW("'CUDAPinnedPlace' is not supported in CPU only device.");
#endif
C
chengduoZH 已提交
182 183
}

184 185 186 187
size_t memory_usage(const platform::Place& p) {
  return boost::apply_visitor(Usage(), p);
}

188 189
}  // namespace memory
}  // namespace paddle