system_allocator.cc 8.0 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
2 3 4 5 6 7 8 9 10 11 12 13

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
D
dzhwinter 已提交
14
#define GLOG_NO_ABBREVIATED_SEVERITIES
15

Y
Yi Wang 已提交
16
#include "paddle/fluid/memory/detail/system_allocator.h"
17

D
dzhwinter 已提交
18 19
#ifdef _WIN32
#include <malloc.h>
20 21 22
#ifndef NOMINMAX
#define NOMINMAX  // msvc max/min macro conflict with std::min/max
#endif
D
dzhwinter 已提交
23 24
#include <windows.h>  // VirtualLock/VirtualUnlock
#else
25
#include <sys/mman.h>  // for mlock and munlock
D
dzhwinter 已提交
26
#endif
27
#include "gflags/gflags.h"
28
#include "paddle/fluid/memory/allocation/allocator.h"
Y
Yi Wang 已提交
29 30 31
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/gpu_info.h"
32
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
33 34
#include "paddle/fluid/platform/cuda_device_guard.h"
#endif
35

S
sneaxiy 已提交
36
DECLARE_bool(use_pinned_memory);
37
DECLARE_double(fraction_of_gpu_memory_to_use);
38 39
DECLARE_uint64(initial_gpu_memory_in_mb);
DECLARE_uint64(reallocate_gpu_memory_in_mb);
Z
zhhsplendid 已提交
40

41 42 43 44
namespace paddle {
namespace memory {
namespace detail {

D
dzhwinter 已提交
45
void* AlignedMalloc(size_t size) {
G
gongweibao 已提交
46
  void* p = nullptr;
D
dzhwinter 已提交
47
  size_t alignment = 32ul;
T
tensor-tang 已提交
48
#ifdef PADDLE_WITH_MKLDNN
49 50
  // refer to https://github.com/01org/mkl-dnn/blob/master/include/mkldnn.hpp
  // memory alignment
D
dzhwinter 已提交
51 52 53 54
  alignment = 4096ul;
#endif
#ifdef _WIN32
  p = _aligned_malloc(size, alignment);
55
#else
56 57 58 59 60
  int error = posix_memalign(&p, alignment, size);
  PADDLE_ENFORCE_EQ(
      error, 0,
      platform::errors::ResourceExhausted(
          "Fail to alloc memory of %ld size, error code is %d.", size, error));
61
#endif
62 63
  PADDLE_ENFORCE_NOT_NULL(p, platform::errors::ResourceExhausted(
                                 "Fail to alloc memory of %ld size.", size));
D
dzhwinter 已提交
64 65 66 67 68 69 70 71 72 73 74 75
  return p;
}

void* CPUAllocator::Alloc(size_t* index, size_t size) {
  // According to http://www.cplusplus.com/reference/cstdlib/malloc/,
  // malloc might not return nullptr if size is zero, but the returned
  // pointer shall not be dereferenced -- so we make it nullptr.
  if (size <= 0) return nullptr;

  *index = 0;  // unlock memory

  void* p = AlignedMalloc(size);
76 77 78

  if (p != nullptr) {
    if (FLAGS_use_pinned_memory) {
Y
Yi Wang 已提交
79
      *index = 1;
D
dzhwinter 已提交
80 81 82
#ifdef _WIN32
      VirtualLock(p, size);
#else
83
      mlock(p, size);  // lock memory
D
dzhwinter 已提交
84
#endif
85
    }
86
  }
87

88 89 90
  return p;
}

L
liaogang 已提交
91
void CPUAllocator::Free(void* p, size_t size, size_t index) {
92
  if (p != nullptr && index == 1) {
D
dzhwinter 已提交
93 94 95
#ifdef _WIN32
    VirtualUnlock(p, size);
#else
96
    munlock(p, size);
D
dzhwinter 已提交
97
#endif
98
  }
P
peizhilin 已提交
99 100 101
#ifdef _WIN32
  _aligned_free(p);
#else
102
  free(p);
P
peizhilin 已提交
103
#endif
104 105
}

L
liaogang 已提交
106
bool CPUAllocator::UseGpu() const { return false; }
L
liaogang 已提交
107

108
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
109

Y
Yi Wang 已提交
110
void* GPUAllocator::Alloc(size_t* index, size_t size) {
111 112
  // CUDA documentation doesn't explain if cudaMalloc returns nullptr
  // if size is 0.  We just make sure it does.
L
liaogang 已提交
113
  if (size <= 0) return nullptr;
Y
Yu Yang 已提交
114

115
  void* p;
116
  auto result = platform::RecordedCudaMalloc(&p, size, gpu_id_);
Y
Yu Yang 已提交
117

118
  if (result == gpuSuccess) {
Y
Yi Wang 已提交
119
    *index = 0;
120
    gpu_alloc_size_ += size;
L
liaogang 已提交
121
    return p;
122
  } else {
123 124 125 126 127 128 129 130 131 132 133 134 135 136
    size_t avail, total, actual_avail, actual_total;
    bool is_limited = platform::RecordedCudaMemGetInfo(
        &avail, &total, &actual_avail, &actual_total, gpu_id_);

    std::string err_msg;
    if (is_limited) {
      auto limit_size = (total >> 20);
      err_msg = string::Sprintf(
          "\n   3) Set environment variable `FLAGS_gpu_memory_limit_mb` to a "
          "larger value. Currently `FLAGS_gpu_memory_limit_mb` is %d, so the "
          "maximum GPU memory usage is limited to %d MB.\n"
          "      The command is `export FLAGS_gpu_memory_limit_mb=xxx`.",
          limit_size, limit_size);
    }
137

138
    PADDLE_THROW_BAD_ALLOC(platform::errors::ResourceExhausted(
139 140 141 142 143 144 145 146 147 148
        "\n\nOut of memory error on GPU %d. "
        "Cannot allocate %s memory on GPU %d, "
        "available memory is only %s.\n\n"
        "Please check whether there is any other process using GPU %d.\n"
        "1. If yes, please stop them, or start PaddlePaddle on another GPU.\n"
        "2. If no, please try one of the following suggestions:\n"
        "   1) Decrease the batch size of your model.\n"
        "   2) FLAGS_fraction_of_gpu_memory_to_use is %.2lf now, "
        "please set it to a higher value but less than 1.0.\n"
        "      The command is "
149
        "`export FLAGS_fraction_of_gpu_memory_to_use=xxx`.%s\n\n",
150 151
        gpu_id_, string::HumanReadableSize(size), gpu_id_,
        string::HumanReadableSize(avail), gpu_id_,
152
        FLAGS_fraction_of_gpu_memory_to_use, err_msg));
L
liaogang 已提交
153
  }
154 155
}

L
liaogang 已提交
156
void GPUAllocator::Free(void* p, size_t size, size_t index) {
157 158 159 160 161 162 163
  PADDLE_ENFORCE_EQ(index, 0, platform::errors::InvalidArgument(
                                  "The index should be 0, index is %d", index));
  PADDLE_ENFORCE_GE(gpu_alloc_size_, size,
                    platform::errors::InvalidArgument(
                        "The size of memory (%d) to free exceeds the size of "
                        "allocated gpu memory (%d)",
                        size, gpu_alloc_size_));
164
  gpu_alloc_size_ -= size;
165 166

  platform::RecordedCudaFree(p, size, gpu_id_);
167 168
}

L
liaogang 已提交
169
bool GPUAllocator::UseGpu() const { return true; }
L
liaogang 已提交
170

C
chengduoZH 已提交
171 172
// PINNED memory allows direct DMA transfers by the GPU to and from system
// memory. It’s locked to a physical address.
Y
Yi Wang 已提交
173
void* CUDAPinnedAllocator::Alloc(size_t* index, size_t size) {
C
chengduoZH 已提交
174
  if (size <= 0) return nullptr;
C
chengduoZH 已提交
175

176
  // NOTE: here, we use CUDAPinnedMaxAllocSize as the maximum memory size
C
chengduoZH 已提交
177
  // of host pinned allocation. Allocates too much would reduce
C
chengduoZH 已提交
178
  // the amount of memory available to the underlying system for paging.
C
chengduoZH 已提交
179
  size_t usable =
180
      paddle::platform::CUDAPinnedMaxAllocSize() - cuda_pinnd_alloc_size_;
C
chengduoZH 已提交
181

C
chengduoZH 已提交
182 183 184 185 186 187
  if (size > usable) {
    LOG(WARNING) << "Cannot malloc " << size / 1024.0 / 1024.0
                 << " MB pinned memory."
                 << ", available " << usable / 1024.0 / 1024.0 << " MB";
    return nullptr;
  }
C
chengduoZH 已提交
188

C
chengduoZH 已提交
189
  void* p;
190 191 192 193
// PINNED memory is visible to all CUDA contexts.
#ifdef PADDLE_WITH_HIP
  hipError_t result = hipHostMalloc(&p, size);
#else
D
Dun Liang 已提交
194
  cudaError_t result = cudaHostAlloc(&p, size, cudaHostAllocPortable);
195
#endif
C
chengduoZH 已提交
196

197
  if (result == gpuSuccess) {
Y
Yi Wang 已提交
198
    *index = 1;  // PINNED memory
C
chengduoZH 已提交
199
    cuda_pinnd_alloc_size_ += size;
C
chengduoZH 已提交
200
    return p;
C
chengduoZH 已提交
201
  } else {
D
Dun Liang 已提交
202
    LOG(WARNING) << "cudaHostAlloc failed.";
C
chengduoZH 已提交
203
    return nullptr;
C
chengduoZH 已提交
204 205 206 207 208 209
  }

  return nullptr;
}

void CUDAPinnedAllocator::Free(void* p, size_t size, size_t index) {
210
  gpuError_t err;
211 212 213 214 215 216 217 218
  PADDLE_ENFORCE_EQ(index, 1, platform::errors::InvalidArgument(
                                  "The index should be 1, but got %d", index));

  PADDLE_ENFORCE_GE(cuda_pinnd_alloc_size_, size,
                    platform::errors::InvalidArgument(
                        "The size of memory (%d) to free exceeds the size of "
                        "allocated cuda pinned memory (%d)",
                        size, cuda_pinnd_alloc_size_));
C
chengduoZH 已提交
219
  cuda_pinnd_alloc_size_ -= size;
220 221 222 223 224 225 226 227 228
#ifdef PADDLE_WITH_HIP
  err = hipHostFree(p);
  if (err != hipErrorDeinitialized) {
    PADDLE_ENFORCE_EQ(
        err, hipSuccess,
        platform::errors::Fatal(
            "hipFreeHost failed in GPUPinnedAllocator, error code is %d", err));
  }
#else
C
chengduoZH 已提交
229 230 231
  err = cudaFreeHost(p);

  // Purposefully allow cudaErrorCudartUnloading, because
C
chengduoZH 已提交
232
  // that is returned if you ever call cudaFreeHost after the
C
chengduoZH 已提交
233 234
  // driver has already shutdown. This happens only if the
  // process is terminating, in which case we don't care if
C
chengduoZH 已提交
235
  // cudaFreeHost succeeds.
C
chengduoZH 已提交
236
  if (err != cudaErrorCudartUnloading) {
237 238 239 240 241
    PADDLE_ENFORCE_EQ(
        err, 0,
        platform::errors::Fatal(
            "cudaFreeHost failed in GPUPinnedAllocator, error code is %d",
            err));
C
chengduoZH 已提交
242
  }
243
#endif
C
chengduoZH 已提交
244 245
}

C
chengduoZH 已提交
246
bool CUDAPinnedAllocator::UseGpu() const { return false; }
C
chengduoZH 已提交
247

L
Luo Tao 已提交
248
#endif
249 250 251 252

}  // namespace detail
}  // namespace memory
}  // namespace paddle