gpu_info.h 5.6 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
L
liaogang 已提交
2 3 4 5 6 7 8 9 10 11 12 13
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

14
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
15

L
liaogang 已提交
16
#include <stddef.h>
17

W
Wilber 已提交
18
#include <array>
L
liaogang 已提交
19
#include <string>
20
#include <vector>
21 22

#include "paddle/fluid/platform/device/gpu/gpu_types.h"
23
#include "paddle/phi/backends/gpu/gpu_info.h"
L
liaogang 已提交
24 25 26

namespace paddle {
namespace platform {
27 28
//! Get the version of dnn
int DnnVersion();
L
liaogang 已提交
29 30

//! Get the total number of GPU devices in system.
31
int GetGPUDeviceCount();
L
liaogang 已提交
32

33
//! Get the compute capability of the ith GPU (format: major * 10 + minor)
34
int GetGPUComputeCapability(int id);
35

C
chengduo 已提交
36
//! Get the runtime version of the ith GPU
37
int GetGPURuntimeVersion(int id);
C
chengduo 已提交
38 39

//! Get the driver version of the ith GPU
40
int GetGPUDriverVersion(int id);
C
chengduo 已提交
41

42 43 44
//! Wheter the current device support TensorCore
bool TensorCoreAvailable();

C
chengduoZH 已提交
45
//! Get the MultiProcessors of the ith GPU.
46
int GetGPUMultiProcessors(int id);
C
chengduoZH 已提交
47 48

//! Get the MaxThreads of each MultiProcessor of the ith GPU.
49
int GetGPUMaxThreadsPerMultiProcessor(int id);
C
chengduoZH 已提交
50

51
//! Get the MaxThreads of each block of the ith GPU.
52
int GetGPUMaxThreadsPerBlock(int id);
53

L
liaogang 已提交
54 55 56
//! Get the current GPU device id in system.
int GetCurrentDeviceId();

57
//! Get the maximum GridDim size for GPU buddy allocator.
W
Wilber 已提交
58
std::array<int, 3> GetGpuMaxGridDimSize(int);
59

60 61 62
//! Get a list of device ids from environment variable or use all.
std::vector<int> GetSelectedDevices();

63 64 65
//! Get the properties of the ith GPU device.
const gpuDeviceProp &GetDeviceProperties(int id);

L
liaogang 已提交
66 67 68
//! Set the GPU device id for next execution.
void SetDeviceId(int device_id);

Q
Qiao Longfei 已提交
69
//! Get the memory usage of current GPU device.
70
void GpuMemoryUsage(size_t *available, size_t *total);
L
liaogang 已提交
71

72 73 74 75
//! Get the available memory to allocate, which is the size of available gpu
//! minus reserving.
size_t GpuAvailableMemToAlloc();

L
liaogang 已提交
76 77 78
//! Get the maximum allocation size of current GPU device.
size_t GpuMaxAllocSize();

Z
zhhsplendid 已提交
79 80 81 82 83 84
//! Get the initial allocation size of current GPU device.
size_t GpuInitAllocSize();

//! Get the re-allocation size of current GPU device.
size_t GpuReallocSize();

85
using phi::backends::gpu::GpuMinChunkSize;
L
liaogang 已提交
86 87 88 89

//! Get the maximum chunk size for GPU buddy allocator.
size_t GpuMaxChunkSize();

L
liaogang 已提交
90
//! Copy memory from address src to dst asynchronously.
91 92 93 94 95
void GpuMemcpyAsync(void *dst,
                    const void *src,
                    size_t count,
                    gpuMemcpyKind kind,
                    gpuStream_t stream);
L
liaogang 已提交
96

97
//! Copy memory from address src to dst synchronously.
98 99 100
void GpuMemcpySync(void *dst,
                   const void *src,
                   size_t count,
101
                   gpuMemcpyKind kind);
102 103

//! Copy memory from one device to another device asynchronously.
104 105 106 107 108 109
void GpuMemcpyPeerAsync(void *dst,
                        int dst_device,
                        const void *src,
                        int src_device,
                        size_t count,
                        gpuStream_t stream);
110 111

//! Copy memory from one device to another device synchronously.
112 113
void GpuMemcpyPeerSync(
    void *dst, int dst_device, const void *src, int src_device, size_t count);
L
liaogang 已提交
114

D
dzhwinter 已提交
115
//! Set memory dst with value count size asynchronously
116
void GpuMemsetAsync(void *dst, int value, size_t count, gpuStream_t stream);
D
dzhwinter 已提交
117

石晓伟 已提交
118
//! Blocks until stream has completed all operations.
119
void GpuStreamSync(gpuStream_t stream);
石晓伟 已提交
120

121 122 123
void GpuDestroyStream(gpuStream_t stream);

// ! Blocks until device has completed all operations.
W
Wilber 已提交
124
void GpuDeviceSync();
125

126
//! CudaMalloc with recorded info
127 128 129
gpuError_t RecordedGpuMalloc(void **ptr,
                             size_t size,
                             int dev_id,
130
                             bool malloc_managed_memory = false);
131 132

//! CudaFree with recorded info
133 134 135
void RecordedGpuFree(void *p, size_t size, int dev_id);

gpuError_t GpuGetLastError();
136

137 138 139
#ifdef PADDLE_WITH_CUDA
#if CUDA_VERSION >= 10020
//! cuMemCreate with recorded info
140 141
CUresult RecordedGpuMemCreate(CUmemGenericAllocationHandle *handle,
                              size_t size,
142
                              const CUmemAllocationProp *prop,
143 144
                              unsigned long long flags,  // NOLINT
                              int dev_id);
145 146

//! cuMemRelease with recorded info
147 148
CUresult RecordedGpuMemRelease(CUmemGenericAllocationHandle handle,
                               size_t size,
149
                               int dev_id);
150 151 152
#endif
#endif

153
//! Get available and total gpu memory with considering limitation
154 155 156 157 158
bool RecordedGpuMemGetInfo(size_t *avail,
                           size_t *total,
                           size_t *actual_avail,
                           size_t *actual_total,
                           int dev_id);
159 160

//! Get recorded cudaMalloc size. If record is disabled, return 0.
161
uint64_t RecordedGpuMallocSize(int dev_id);
162

163 164
uint64_t RecordedGpuLimitSize(int dev_id);

165
bool IsGpuMallocRecorded(int dev_id);
166

167 168 169
//! Empty idle cached memory held by the allocator.
void EmptyCache(void);

170 171 172 173
bool IsGPUManagedMemorySupported(int dev_id);

bool IsGPUManagedMemoryOversubscriptionSupported(int dev_id);

F
From00 已提交
174 175
//! Get the primitive pointer return from cudaMalloc, just implemented with
//! testing, do not use for release
F
From00 已提交
176 177
void *GetGpuBasePtr(void *ptr, int dev_id);

L
liaogang 已提交
178 179 180
}  // namespace platform
}  // namespace paddle

L
Luo Tao 已提交
181
#endif