system_allocator.h 3.0 KB
Newer Older
L
liaogang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

17 18 19
#include <stddef.h>    // for size_t
#include <sys/mman.h>  // for mlock and munlock
#include <cstdlib>     // for malloc and free
L
liaogang 已提交
20

21
#include <gflags/gflags.h>
Y
Yi Wang 已提交
22
#include "paddle/platform/assert.h"
23 24 25 26
#include "paddle/platform/cuda.h"

DEFINE_bool(uses_pinned_memory, false,
            "If set, allocate cpu/gpu pinned memory.");
Y
Yi Wang 已提交
27

L
liaogang 已提交
28 29 30 31
namespace paddle {
namespace memory {
namespace detail {

32 33 34 35 36
// If uses_pinned_memory is true, CPUAllocator calls mlock, which
// returns pinned and locked memory as staging areas for data exchange
// between host and device.  Allocates too much would reduce the amount
// of memory available to the system for paging.  So, by default, we
// should set false to uses_pinned_memory.
Y
Yi Wang 已提交
37
class CPUAllocator {
38
 public:
39
  static void* Alloc(size_t size) {
40
    void* p = std::malloc(size);
41
    if (p != nullptr && FLAGS_uses_pinned_memory) {
42 43
      mlock(p, size);
    }
44
    return p;
45
  }
Y
Yi Wang 已提交
46

47 48 49
  static void Free(void* p, size_t size) {
    if (p != nullptr && FLAGS_uses_pinned_memory) {
      munlock(p, size);
Y
Yi Wang 已提交
50
    }
51
    std::free(p);
Y
Yi Wang 已提交
52 53 54
  }
};

55 56
#ifndef PADDLE_ONLY_CPU  // The following code are for CUDA.

L
liaogang 已提交
57 58 59 60 61 62
// GPUAllocator<staging=true> calls cudaHostMalloc, which returns
// pinned and locked memory as staging areas for data exchange
// between host and device.  Allocates too much would reduce the
// amount of memory available to the system for paging.  So, by
// default, we should use GPUAllocator<staging=false>.
class GPUAllocator {
63
 public:
64
  static void* Alloc(size_t size) {
L
liaogang 已提交
65
    void* p = 0;
66 67
    cudaError_t result = FLAGS_uses_pinned_memory ? cudaMallocHost(&p, size)
                                                  : cudaMalloc(&p, size);
Y
Yi Wang 已提交
68 69
    if (result != cudaSuccess) {
      cudaGetLastError();  // clear error if there is any.
L
liaogang 已提交
70
    }
71 72 73 74 75 76 77 78 79 80 81 82 83
    return result == cudaSuccess ? p : nullptr;
  }

  static void Free(void* p, size_t size) {
    // Purposefully allow cudaErrorCudartUnloading, because
    // that is returned if you ever call cudaFree after the
    // driver has already shutdown. This happens only if the
    // process is terminating, in which case we don't care if
    // cudaFree succeeds.
    cudaError_t err = FLAGS_uses_pinned_memory ? cudaFreeHost(p) : cudaFree(p);
    if (err != cudaErrorCudartUnloading) {
      platform::throw_on_error(err, "cudaFree{Host} failed");
    }
L
liaogang 已提交
84 85 86
  }
};

87
#endif  // PADDLE_ONLY_CPU
L
liaogang 已提交
88 89 90 91

}  // namespace detail
}  // namespace memory
}  // namespace paddle