gpu_info.h 5.2 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
L
liaogang 已提交
2 3 4 5 6 7 8 9 10 11 12 13
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

14
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
15

L
liaogang 已提交
16
#include <stddef.h>
W
Wilber 已提交
17
#include <array>
L
liaogang 已提交
18
#include <string>
19
#include <vector>
20 21

#include "paddle/fluid/platform/device/gpu/gpu_types.h"
L
liaogang 已提交
22 23 24

namespace paddle {
namespace platform {
25 26
//! Get the version of dnn
int DnnVersion();
L
liaogang 已提交
27 28

//! Get the total number of GPU devices in system.
29
int GetGPUDeviceCount();
L
liaogang 已提交
30

31
//! Get the compute capability of the ith GPU (format: major * 10 + minor)
32
int GetGPUComputeCapability(int id);
33

C
chengduo 已提交
34
//! Get the runtime version of the ith GPU
35
int GetGPURuntimeVersion(int id);
C
chengduo 已提交
36 37

//! Get the driver version of the ith GPU
38
int GetGPUDriverVersion(int id);
C
chengduo 已提交
39

40 41 42
//! Wheter the current device support TensorCore
bool TensorCoreAvailable();

C
chengduoZH 已提交
43
//! Get the MultiProcessors of the ith GPU.
44
int GetGPUMultiProcessors(int id);
C
chengduoZH 已提交
45 46

//! Get the MaxThreads of each MultiProcessor of the ith GPU.
47
int GetGPUMaxThreadsPerMultiProcessor(int id);
C
chengduoZH 已提交
48

49
//! Get the MaxThreads of each block of the ith GPU.
50
int GetGPUMaxThreadsPerBlock(int id);
51

L
liaogang 已提交
52 53 54
//! Get the current GPU device id in system.
int GetCurrentDeviceId();

55
//! Get the maximum GridDim size for GPU buddy allocator.
W
Wilber 已提交
56
std::array<int, 3> GetGpuMaxGridDimSize(int);
57

58 59 60
//! Get a list of device ids from environment variable or use all.
std::vector<int> GetSelectedDevices();

61 62 63
//! Get the properties of the ith GPU device.
const gpuDeviceProp &GetDeviceProperties(int id);

L
liaogang 已提交
64 65 66
//! Set the GPU device id for next execution.
void SetDeviceId(int device_id);

Q
Qiao Longfei 已提交
67
//! Get the memory usage of current GPU device.
68
void GpuMemoryUsage(size_t *available, size_t *total);
L
liaogang 已提交
69

70 71 72 73
//! Get the available memory to allocate, which is the size of available gpu
//! minus reserving.
size_t GpuAvailableMemToAlloc();

L
liaogang 已提交
74 75 76
//! Get the maximum allocation size of current GPU device.
size_t GpuMaxAllocSize();

Z
zhhsplendid 已提交
77 78 79 80 81 82
//! Get the initial allocation size of current GPU device.
size_t GpuInitAllocSize();

//! Get the re-allocation size of current GPU device.
size_t GpuReallocSize();

L
liaogang 已提交
83 84 85 86 87 88
//! Get the minimum chunk size for GPU buddy allocator.
size_t GpuMinChunkSize();

//! Get the maximum chunk size for GPU buddy allocator.
size_t GpuMaxChunkSize();

L
liaogang 已提交
89 90
//! Copy memory from address src to dst asynchronously.
void GpuMemcpyAsync(void *dst, const void *src, size_t count,
91
                    gpuMemcpyKind kind, gpuStream_t stream);
L
liaogang 已提交
92

93 94
//! Copy memory from address src to dst synchronously.
void GpuMemcpySync(void *dst, const void *src, size_t count,
95
                   gpuMemcpyKind kind);
96 97 98

//! Copy memory from one device to another device asynchronously.
void GpuMemcpyPeerAsync(void *dst, int dst_device, const void *src,
99
                        int src_device, size_t count, gpuStream_t stream);
100 101 102 103

//! Copy memory from one device to another device synchronously.
void GpuMemcpyPeerSync(void *dst, int dst_device, const void *src,
                       int src_device, size_t count);
L
liaogang 已提交
104

D
dzhwinter 已提交
105
//! Set memory dst with value count size asynchronously
106
void GpuMemsetAsync(void *dst, int value, size_t count, gpuStream_t stream);
D
dzhwinter 已提交
107

石晓伟 已提交
108
//! Blocks until stream has completed all operations.
109
void GpuStreamSync(gpuStream_t stream);
石晓伟 已提交
110

111 112 113
void GpuDestroyStream(gpuStream_t stream);

// ! Blocks until device has completed all operations.
W
Wilber 已提交
114
void GpuDeviceSync();
115

116
//! CudaMalloc with recorded info
117 118
gpuError_t RecordedGpuMalloc(void **ptr, size_t size, int dev_id,
                             bool malloc_managed_memory = false);
119 120

//! CudaFree with recorded info
121 122 123
void RecordedGpuFree(void *p, size_t size, int dev_id);

gpuError_t GpuGetLastError();
124

125 126 127
#ifdef PADDLE_WITH_CUDA
#if CUDA_VERSION >= 10020
//! cuMemCreate with recorded info
128 129 130
CUresult RecordedGpuMemCreate(CUmemGenericAllocationHandle *handle, size_t size,
                              const CUmemAllocationProp *prop,
                              unsigned long long flags, int dev_id);  // NOLINT
131 132

//! cuMemRelease with recorded info
133 134
CUresult RecordedGpuMemRelease(CUmemGenericAllocationHandle handle, size_t size,
                               int dev_id);
135 136 137
#endif
#endif

138
//! Get available and total gpu memory with considering limitation
139 140
bool RecordedGpuMemGetInfo(size_t *avail, size_t *total, size_t *actual_avail,
                           size_t *actual_total, int dev_id);
141 142

//! Get recorded cudaMalloc size. If record is disabled, return 0.
143
uint64_t RecordedGpuMallocSize(int dev_id);
144

145 146
uint64_t RecordedGpuLimitSize(int dev_id);

147
bool IsGpuMallocRecorded(int dev_id);
148

149 150 151
//! Empty idle cached memory held by the allocator.
void EmptyCache(void);

152 153 154 155
bool IsGPUManagedMemorySupported(int dev_id);

bool IsGPUManagedMemoryOversubscriptionSupported(int dev_id);

F
From00 已提交
156 157
//! Get the primitive pointer return from cudaMalloc, just implemented with
//! testing, do not use for release
F
From00 已提交
158 159
void *GetGpuBasePtr(void *ptr, int dev_id);

L
liaogang 已提交
160 161 162
}  // namespace platform
}  // namespace paddle

L
Luo Tao 已提交
163
#endif