system_allocator.cc 7.3 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
2 3 4 5 6 7 8 9 10 11 12 13

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
D
dzhwinter 已提交
14
#define GLOG_NO_ABBREVIATED_SEVERITIES
15

Y
Yi Wang 已提交
16
#include "paddle/fluid/memory/detail/system_allocator.h"
17

D
dzhwinter 已提交
18 19 20 21
#ifdef _WIN32
#include <malloc.h>
#include <windows.h>  // VirtualLock/VirtualUnlock
#else
22
#include <sys/mman.h>  // for mlock and munlock
D
dzhwinter 已提交
23 24 25
#endif
#include <stdlib.h>   // for malloc and free
#include <algorithm>  // for std::max
26 27
#include <string>
#include <utility>
28 29

#include "gflags/gflags.h"
30
#include "paddle/fluid/memory/allocation/allocator.h"
Y
Yi Wang 已提交
31 32 33
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/gpu_info.h"
34 35 36
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/cuda_device_guard.h"
#endif
37

S
sneaxiy 已提交
38
DECLARE_bool(use_pinned_memory);
39
DECLARE_double(fraction_of_gpu_memory_to_use);
40 41
DECLARE_uint64(initial_gpu_memory_in_mb);
DECLARE_uint64(reallocate_gpu_memory_in_mb);
Z
zhhsplendid 已提交
42

43 44 45 46
namespace paddle {
namespace memory {
namespace detail {

D
dzhwinter 已提交
47
void* AlignedMalloc(size_t size) {
G
gongweibao 已提交
48
  void* p = nullptr;
D
dzhwinter 已提交
49
  size_t alignment = 32ul;
T
tensor-tang 已提交
50
#ifdef PADDLE_WITH_MKLDNN
51 52
  // refer to https://github.com/01org/mkl-dnn/blob/master/include/mkldnn.hpp
  // memory alignment
D
dzhwinter 已提交
53 54 55 56
  alignment = 4096ul;
#endif
#ifdef _WIN32
  p = _aligned_malloc(size, alignment);
57
#else
D
dzhwinter 已提交
58
  PADDLE_ENFORCE_EQ(posix_memalign(&p, alignment, size), 0, "Alloc %ld error!",
G
gongweibao 已提交
59
                    size);
60
#endif
61
  PADDLE_ENFORCE_NOT_NULL(p, "Fail to allocate CPU memory: size = %d .", size);
D
dzhwinter 已提交
62 63 64 65 66 67 68 69 70 71 72 73
  return p;
}

void* CPUAllocator::Alloc(size_t* index, size_t size) {
  // According to http://www.cplusplus.com/reference/cstdlib/malloc/,
  // malloc might not return nullptr if size is zero, but the returned
  // pointer shall not be dereferenced -- so we make it nullptr.
  if (size <= 0) return nullptr;

  *index = 0;  // unlock memory

  void* p = AlignedMalloc(size);
74 75 76

  if (p != nullptr) {
    if (FLAGS_use_pinned_memory) {
Y
Yi Wang 已提交
77
      *index = 1;
D
dzhwinter 已提交
78 79 80
#ifdef _WIN32
      VirtualLock(p, size);
#else
81
      mlock(p, size);  // lock memory
D
dzhwinter 已提交
82
#endif
83
    }
84
  }
85

86 87 88
  return p;
}

L
liaogang 已提交
89
void CPUAllocator::Free(void* p, size_t size, size_t index) {
90
  if (p != nullptr && index == 1) {
D
dzhwinter 已提交
91 92 93
#ifdef _WIN32
    VirtualUnlock(p, size);
#else
94
    munlock(p, size);
D
dzhwinter 已提交
95
#endif
96
  }
P
peizhilin 已提交
97 98 99
#ifdef _WIN32
  _aligned_free(p);
#else
100
  free(p);
P
peizhilin 已提交
101
#endif
102 103
}

L
liaogang 已提交
104
bool CPUAllocator::UseGpu() const { return false; }
L
liaogang 已提交
105

106
#ifdef PADDLE_WITH_CUDA
107

108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
static void ClearCUDAOutOfMemoryError(cudaError_t* status) {
  if (*status == cudaErrorMemoryAllocation) {
    *status = cudaSuccess;
  }

  PADDLE_ENFORCE_CUDA_SUCCESS(*status);

  *status = cudaGetLastError();
  if (*status == cudaErrorMemoryAllocation) {
    *status = cudaSuccess;
  }

  PADDLE_ENFORCE_CUDA_SUCCESS(*status);
}

Y
Yi Wang 已提交
123
void* GPUAllocator::Alloc(size_t* index, size_t size) {
124 125
  // CUDA documentation doesn't explain if cudaMalloc returns nullptr
  // if size is 0.  We just make sure it does.
L
liaogang 已提交
126
  if (size <= 0) return nullptr;
Y
Yu Yang 已提交
127

128
  paddle::platform::CUDADeviceGuard guard(gpu_id_);
Y
Yu Yang 已提交
129

130 131
  void* p;
  cudaError_t result = cudaMalloc(&p, size);
Y
Yu Yang 已提交
132

L
liaogang 已提交
133
  if (result == cudaSuccess) {
Y
Yi Wang 已提交
134
    *index = 0;
135
    gpu_alloc_size_ += size;
L
liaogang 已提交
136
    return p;
137
  } else {
138 139 140 141 142 143 144 145 146 147 148 149 150
    ClearCUDAOutOfMemoryError(&result);

    /**
     * NOTE(zjl): Sometimes cudaMemGetInfo would raise OOM error
     * if there is very little GPU memory left. In this case, we
     * should consider the available GPU memory to be 0, and throw
     * exception inside this function instead of throwing exception
     * inside cudaMemGetInfo.
     */
    size_t avail = 0, total = 0;
    result = cudaMemGetInfo(&avail, &total);
    if (result != cudaSuccess) avail = 0;
    ClearCUDAOutOfMemoryError(&result);
151

152
    PADDLE_THROW_BAD_ALLOC(
153 154 155 156 157 158 159 160 161 162 163 164 165 166
        "\n\nOut of memory error on GPU %d. "
        "Cannot allocate %s memory on GPU %d, "
        "available memory is only %s.\n\n"
        "Please check whether there is any other process using GPU %d.\n"
        "1. If yes, please stop them, or start PaddlePaddle on another GPU.\n"
        "2. If no, please try one of the following suggestions:\n"
        "   1) Decrease the batch size of your model.\n"
        "   2) FLAGS_fraction_of_gpu_memory_to_use is %.2lf now, "
        "please set it to a higher value but less than 1.0.\n"
        "      The command is "
        "`export FLAGS_fraction_of_gpu_memory_to_use=xxx`.\n\n",
        gpu_id_, string::HumanReadableSize(size), gpu_id_,
        string::HumanReadableSize(avail), gpu_id_,
        FLAGS_fraction_of_gpu_memory_to_use);
L
liaogang 已提交
167
  }
168 169
}

L
liaogang 已提交
170
void GPUAllocator::Free(void* p, size_t size, size_t index) {
171
  cudaError_t err;
172 173 174 175
  PADDLE_ENFORCE_EQ(index, 0);
  PADDLE_ENFORCE_GE(gpu_alloc_size_, size);
  gpu_alloc_size_ -= size;
  err = cudaFree(p);
176

177 178 179 180 181 182
  // Purposefully allow cudaErrorCudartUnloading, because
  // that is returned if you ever call cudaFree after the
  // driver has already shutdown. This happens only if the
  // process is terminating, in which case we don't care if
  // cudaFree succeeds.
  if (err != cudaErrorCudartUnloading) {
L
liaogang 已提交
183
    PADDLE_ENFORCE(err, "cudaFree{Host} failed in GPUAllocator::Free.");
184 185 186
  }
}

L
liaogang 已提交
187
bool GPUAllocator::UseGpu() const { return true; }
L
liaogang 已提交
188

C
chengduoZH 已提交
189 190
// PINNED memory allows direct DMA transfers by the GPU to and from system
// memory. It’s locked to a physical address.
Y
Yi Wang 已提交
191
void* CUDAPinnedAllocator::Alloc(size_t* index, size_t size) {
C
chengduoZH 已提交
192
  if (size <= 0) return nullptr;
C
chengduoZH 已提交
193

194
  // NOTE: here, we use CUDAPinnedMaxAllocSize as the maximum memory size
C
chengduoZH 已提交
195
  // of host pinned allocation. Allocates too much would reduce
C
chengduoZH 已提交
196
  // the amount of memory available to the underlying system for paging.
C
chengduoZH 已提交
197
  size_t usable =
198
      paddle::platform::CUDAPinnedMaxAllocSize() - cuda_pinnd_alloc_size_;
C
chengduoZH 已提交
199

C
chengduoZH 已提交
200 201 202 203 204 205
  if (size > usable) {
    LOG(WARNING) << "Cannot malloc " << size / 1024.0 / 1024.0
                 << " MB pinned memory."
                 << ", available " << usable / 1024.0 / 1024.0 << " MB";
    return nullptr;
  }
C
chengduoZH 已提交
206

C
chengduoZH 已提交
207
  void* p;
C
chengduoZH 已提交
208
  // PINNED memory is visible to all CUDA contexts.
D
Dun Liang 已提交
209
  cudaError_t result = cudaHostAlloc(&p, size, cudaHostAllocPortable);
C
chengduoZH 已提交
210

C
chengduoZH 已提交
211
  if (result == cudaSuccess) {
Y
Yi Wang 已提交
212
    *index = 1;  // PINNED memory
C
chengduoZH 已提交
213
    cuda_pinnd_alloc_size_ += size;
C
chengduoZH 已提交
214
    return p;
C
chengduoZH 已提交
215
  } else {
D
Dun Liang 已提交
216
    LOG(WARNING) << "cudaHostAlloc failed.";
C
chengduoZH 已提交
217
    return nullptr;
C
chengduoZH 已提交
218 219 220 221 222 223 224
  }

  return nullptr;
}

void CUDAPinnedAllocator::Free(void* p, size_t size, size_t index) {
  cudaError_t err;
225
  PADDLE_ENFORCE_EQ(index, 1);
C
chengduoZH 已提交
226

227
  PADDLE_ENFORCE_GE(cuda_pinnd_alloc_size_, size);
C
chengduoZH 已提交
228
  cuda_pinnd_alloc_size_ -= size;
C
chengduoZH 已提交
229 230 231
  err = cudaFreeHost(p);

  // Purposefully allow cudaErrorCudartUnloading, because
C
chengduoZH 已提交
232
  // that is returned if you ever call cudaFreeHost after the
C
chengduoZH 已提交
233 234
  // driver has already shutdown. This happens only if the
  // process is terminating, in which case we don't care if
C
chengduoZH 已提交
235
  // cudaFreeHost succeeds.
C
chengduoZH 已提交
236 237 238 239 240
  if (err != cudaErrorCudartUnloading) {
    PADDLE_ENFORCE(err, "cudaFreeHost failed in GPUPinnedAllocator::Free.");
  }
}

C
chengduoZH 已提交
241
bool CUDAPinnedAllocator::UseGpu() const { return false; }
C
chengduoZH 已提交
242

L
Luo Tao 已提交
243
#endif
244 245 246 247

}  // namespace detail
}  // namespace memory
}  // namespace paddle