system_allocator.cc 6.2 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
2 3 4 5 6 7 8 9 10 11 12 13

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
D
dzhwinter 已提交
14
#define GLOG_NO_ABBREVIATED_SEVERITIES
D
dzhwinter 已提交
15
#define GOOGLE_GLOG_DLL_DECL
16

Y
Yi Wang 已提交
17
#include "paddle/fluid/memory/detail/system_allocator.h"
18

D
dzhwinter 已提交
19 20 21 22
#ifdef _WIN32
#include <malloc.h>
#include <windows.h>  // VirtualLock/VirtualUnlock
#else
23
#include <sys/mman.h>  // for mlock and munlock
D
dzhwinter 已提交
24 25 26
#endif
#include <stdlib.h>   // for malloc and free
#include <algorithm>  // for std::max
27 28

#include "gflags/gflags.h"
Y
Yi Wang 已提交
29 30 31 32
#include "paddle/fluid/platform/assert.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/gpu_info.h"
33 34 35 36 37 38

// If use_pinned_memory is true, CPUAllocator calls mlock, which
// returns pinned and locked memory as staging areas for data exchange
// between host and device.  Allocates too much would reduce the amount
// of memory available to the system for paging.  So, by default, we
// should set false to use_pinned_memory.
39
DEFINE_bool(use_pinned_memory, true, "If set, allocate cpu pinned memory.");
40
DECLARE_double(fraction_of_gpu_memory_to_use);
41 42 43 44
namespace paddle {
namespace memory {
namespace detail {

D
dzhwinter 已提交
45
void* AlignedMalloc(size_t size) {
G
gongweibao 已提交
46
  void* p = nullptr;
D
dzhwinter 已提交
47
  size_t alignment = 32ul;
T
tensor-tang 已提交
48
#ifdef PADDLE_WITH_MKLDNN
49 50
  // refer to https://github.com/01org/mkl-dnn/blob/master/include/mkldnn.hpp
  // memory alignment
D
dzhwinter 已提交
51 52 53 54
  alignment = 4096ul;
#endif
#ifdef _WIN32
  p = _aligned_malloc(size, alignment);
55
#else
D
dzhwinter 已提交
56
  PADDLE_ENFORCE_EQ(posix_memalign(&p, alignment, size), 0, "Alloc %ld error!",
G
gongweibao 已提交
57
                    size);
58 59
#endif
  PADDLE_ENFORCE(p, "Fail to allocate CPU memory: size = %d .", size);
D
dzhwinter 已提交
60 61 62 63 64 65 66 67 68 69 70 71
  return p;
}

void* CPUAllocator::Alloc(size_t* index, size_t size) {
  // According to http://www.cplusplus.com/reference/cstdlib/malloc/,
  // malloc might not return nullptr if size is zero, but the returned
  // pointer shall not be dereferenced -- so we make it nullptr.
  if (size <= 0) return nullptr;

  *index = 0;  // unlock memory

  void* p = AlignedMalloc(size);
72 73 74

  if (p != nullptr) {
    if (FLAGS_use_pinned_memory) {
Y
Yi Wang 已提交
75
      *index = 1;
D
dzhwinter 已提交
76 77 78
#ifdef _WIN32
      VirtualLock(p, size);
#else
79
      mlock(p, size);  // lock memory
D
dzhwinter 已提交
80
#endif
81
    }
82
  }
83

84 85 86
  return p;
}

L
liaogang 已提交
87
void CPUAllocator::Free(void* p, size_t size, size_t index) {
88
  if (p != nullptr && index == 1) {
D
dzhwinter 已提交
89 90 91
#ifdef _WIN32
    VirtualUnlock(p, size);
#else
92
    munlock(p, size);
D
dzhwinter 已提交
93
#endif
94 95 96 97
  }
  free(p);
}

L
liaogang 已提交
98
bool CPUAllocator::UseGpu() const { return false; }
L
liaogang 已提交
99

100
#ifdef PADDLE_WITH_CUDA
101

Y
Yi Wang 已提交
102
void* GPUAllocator::Alloc(size_t* index, size_t size) {
103 104
  // CUDA documentation doesn't explain if cudaMalloc returns nullptr
  // if size is 0.  We just make sure it does.
L
liaogang 已提交
105
  if (size <= 0) return nullptr;
106
  void* p;
Y
Yu Yang 已提交
107 108 109 110 111 112
  int prev_id;
  cudaGetDevice(&prev_id);
  if (prev_id != gpu_id_) {
    cudaSetDevice(gpu_id_);
  }

113
  cudaError_t result = cudaMalloc(&p, size);
Y
Yu Yang 已提交
114 115 116 117 118

  if (prev_id != gpu_id_) {
    cudaSetDevice(prev_id);
  }

L
liaogang 已提交
119
  if (result == cudaSuccess) {
Y
Yi Wang 已提交
120
    *index = 0;
121
    gpu_alloc_size_ += size;
L
liaogang 已提交
122
    return p;
123 124 125 126 127 128 129
  } else {
    LOG(WARNING)
        << "Cannot malloc " << size / 1024.0 / 1024.0
        << " MB GPU memory. Please shrink FLAGS_fraction_of_gpu_memory_to_use "
           "environment variable to a lower value. Current value is "
        << FLAGS_fraction_of_gpu_memory_to_use;
    return nullptr;
L
liaogang 已提交
130
  }
131 132
}

L
liaogang 已提交
133
void GPUAllocator::Free(void* p, size_t size, size_t index) {
134 135 136 137 138 139 140 141 142 143 144 145
  cudaError_t err;

  if (index == 0) {
    PADDLE_ASSERT(gpu_alloc_size_ >= size);
    gpu_alloc_size_ -= size;
    err = cudaFree(p);
  } else {
    PADDLE_ASSERT(fallback_alloc_size_ >= size);
    fallback_alloc_size_ -= size;
    err = cudaFreeHost(p);
  }

146 147 148 149 150 151
  // Purposefully allow cudaErrorCudartUnloading, because
  // that is returned if you ever call cudaFree after the
  // driver has already shutdown. This happens only if the
  // process is terminating, in which case we don't care if
  // cudaFree succeeds.
  if (err != cudaErrorCudartUnloading) {
L
liaogang 已提交
152
    PADDLE_ENFORCE(err, "cudaFree{Host} failed in GPUAllocator::Free.");
153 154 155
  }
}

L
liaogang 已提交
156
bool GPUAllocator::UseGpu() const { return true; }
L
liaogang 已提交
157

C
chengduoZH 已提交
158 159
// PINNED memory allows direct DMA transfers by the GPU to and from system
// memory. It’s locked to a physical address.
Y
Yi Wang 已提交
160
void* CUDAPinnedAllocator::Alloc(size_t* index, size_t size) {
C
chengduoZH 已提交
161
  if (size <= 0) return nullptr;
C
chengduoZH 已提交
162

163
  // NOTE: here, we use CUDAPinnedMaxAllocSize as the maximum memory size
C
chengduoZH 已提交
164
  // of host pinned allocation. Allocates too much would reduce
C
chengduoZH 已提交
165
  // the amount of memory available to the underlying system for paging.
C
chengduoZH 已提交
166
  size_t usable =
167
      paddle::platform::CUDAPinnedMaxAllocSize() - cuda_pinnd_alloc_size_;
C
chengduoZH 已提交
168

C
chengduoZH 已提交
169 170 171 172 173 174
  if (size > usable) {
    LOG(WARNING) << "Cannot malloc " << size / 1024.0 / 1024.0
                 << " MB pinned memory."
                 << ", available " << usable / 1024.0 / 1024.0 << " MB";
    return nullptr;
  }
C
chengduoZH 已提交
175

C
chengduoZH 已提交
176
  void* p;
C
chengduoZH 已提交
177
  // PINNED memory is visible to all CUDA contexts.
C
chengduoZH 已提交
178
  cudaError_t result = cudaMallocHost(&p, size);
C
chengduoZH 已提交
179

C
chengduoZH 已提交
180
  if (result == cudaSuccess) {
Y
Yi Wang 已提交
181
    *index = 1;  // PINNED memory
C
chengduoZH 已提交
182
    cuda_pinnd_alloc_size_ += size;
C
chengduoZH 已提交
183
    return p;
C
chengduoZH 已提交
184 185 186
  } else {
    LOG(WARNING) << "cudaMallocHost failed.";
    return nullptr;
C
chengduoZH 已提交
187 188 189 190 191 192 193 194 195
  }

  return nullptr;
}

void CUDAPinnedAllocator::Free(void* p, size_t size, size_t index) {
  cudaError_t err;
  PADDLE_ASSERT(index == 1);

C
chengduoZH 已提交
196 197
  PADDLE_ASSERT(cuda_pinnd_alloc_size_ >= size);
  cuda_pinnd_alloc_size_ -= size;
C
chengduoZH 已提交
198 199 200
  err = cudaFreeHost(p);

  // Purposefully allow cudaErrorCudartUnloading, because
C
chengduoZH 已提交
201
  // that is returned if you ever call cudaFreeHost after the
C
chengduoZH 已提交
202 203
  // driver has already shutdown. This happens only if the
  // process is terminating, in which case we don't care if
C
chengduoZH 已提交
204
  // cudaFreeHost succeeds.
C
chengduoZH 已提交
205 206 207 208 209
  if (err != cudaErrorCudartUnloading) {
    PADDLE_ENFORCE(err, "cudaFreeHost failed in GPUPinnedAllocator::Free.");
  }
}

C
chengduoZH 已提交
210
bool CUDAPinnedAllocator::UseGpu() const { return false; }
C
chengduoZH 已提交
211

L
Luo Tao 已提交
212
#endif
213 214 215 216

}  // namespace detail
}  // namespace memory
}  // namespace paddle