gpu_info.h 5.7 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
L
liaogang 已提交
2 3 4 5 6 7 8 9 10 11 12 13
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

14
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
15

L
liaogang 已提交
16
#include <stddef.h>
17

W
Wilber 已提交
18
#include <array>
L
liaogang 已提交
19
#include <string>
20
#include <vector>
21 22

#include "paddle/fluid/platform/device/gpu/gpu_types.h"
23
#include "paddle/phi/backends/gpu/gpu_info.h"
L
liaogang 已提交
24 25 26

namespace paddle {
namespace platform {
27 28
//! Get the version of dnn
int DnnVersion();
L
liaogang 已提交
29 30

//! Get the total number of GPU devices in system.
31
int GetGPUDeviceCount();
L
liaogang 已提交
32

33
//! Get the compute capability of the ith GPU (format: major * 10 + minor)
34
int GetGPUComputeCapability(int id);
35

C
chengduo 已提交
36
//! Get the runtime version of the ith GPU
37
int GetGPURuntimeVersion(int id);
C
chengduo 已提交
38 39

//! Get the driver version of the ith GPU
40
int GetGPUDriverVersion(int id);
C
chengduo 已提交
41

42
//! Whether the current device support TensorCore
43 44
bool TensorCoreAvailable();

C
chengduoZH 已提交
45
//! Get the MultiProcessors of the ith GPU.
46
int GetGPUMultiProcessors(int id);
C
chengduoZH 已提交
47 48

//! Get the MaxThreads of each MultiProcessor of the ith GPU.
49
int GetGPUMaxThreadsPerMultiProcessor(int id);
C
chengduoZH 已提交
50

51
//! Get the MaxThreads of each block of the ith GPU.
52
int GetGPUMaxThreadsPerBlock(int id);
53

L
liaogang 已提交
54 55 56
//! Get the current GPU device id in system.
int GetCurrentDeviceId();

57
//! Get the maximum GridDim size for GPU buddy allocator.
W
Wilber 已提交
58
std::array<int, 3> GetGpuMaxGridDimSize(int);
59

60 61 62
//! Get a list of device ids from environment variable or use all.
std::vector<int> GetSelectedDevices();

63 64 65
//! Get the properties of the ith GPU device.
const gpuDeviceProp &GetDeviceProperties(int id);

L
liaogang 已提交
66 67 68
//! Set the GPU device id for next execution.
void SetDeviceId(int device_id);

Q
Qiao Longfei 已提交
69
//! Get the memory usage of current GPU device.
70
void GpuMemoryUsage(size_t *available, size_t *total);
L
liaogang 已提交
71

72 73 74 75
//! Get the available memory to allocate, which is the size of available gpu
//! minus reserving.
size_t GpuAvailableMemToAlloc();

L
liaogang 已提交
76 77 78
//! Get the maximum allocation size of current GPU device.
size_t GpuMaxAllocSize();

Z
zhhsplendid 已提交
79 80 81 82 83 84
//! Get the initial allocation size of current GPU device.
size_t GpuInitAllocSize();

//! Get the re-allocation size of current GPU device.
size_t GpuReallocSize();

85 86
//! Get the minimum chunk size for GPU buddy allocator.
size_t GpuMinChunkSize();
L
liaogang 已提交
87 88 89 90

//! Get the maximum chunk size for GPU buddy allocator.
size_t GpuMaxChunkSize();

L
liaogang 已提交
91
//! Copy memory from address src to dst asynchronously.
92 93 94 95 96
void GpuMemcpyAsync(void *dst,
                    const void *src,
                    size_t count,
                    gpuMemcpyKind kind,
                    gpuStream_t stream);
L
liaogang 已提交
97

98
//! Copy memory from address src to dst synchronously.
99 100 101
void GpuMemcpySync(void *dst,
                   const void *src,
                   size_t count,
102
                   gpuMemcpyKind kind);
103 104

//! Copy memory from one device to another device asynchronously.
105 106 107 108 109 110
void GpuMemcpyPeerAsync(void *dst,
                        int dst_device,
                        const void *src,
                        int src_device,
                        size_t count,
                        gpuStream_t stream);
111 112

//! Copy memory from one device to another device synchronously.
113 114
void GpuMemcpyPeerSync(
    void *dst, int dst_device, const void *src, int src_device, size_t count);
L
liaogang 已提交
115

D
dzhwinter 已提交
116
//! Set memory dst with value count size asynchronously
117
void GpuMemsetAsync(void *dst, int value, size_t count, gpuStream_t stream);
D
dzhwinter 已提交
118

石晓伟 已提交
119
//! Blocks until stream has completed all operations.
120
void GpuStreamSync(gpuStream_t stream);
石晓伟 已提交
121

122 123 124
void GpuDestroyStream(gpuStream_t stream);

// ! Blocks until device has completed all operations.
W
Wilber 已提交
125
void GpuDeviceSync();
126

127
//! CudaMalloc with recorded info
128 129 130
gpuError_t RecordedGpuMalloc(void **ptr,
                             size_t size,
                             int dev_id,
131
                             bool malloc_managed_memory = false);
132 133

//! CudaFree with recorded info
134 135 136
void RecordedGpuFree(void *p, size_t size, int dev_id);

gpuError_t GpuGetLastError();
137

138 139 140
#ifdef PADDLE_WITH_CUDA
#if CUDA_VERSION >= 10020
//! cuMemCreate with recorded info
141 142
CUresult RecordedGpuMemCreate(CUmemGenericAllocationHandle *handle,
                              size_t size,
143
                              const CUmemAllocationProp *prop,
144 145
                              unsigned long long flags,  // NOLINT
                              int dev_id);
146 147

//! cuMemRelease with recorded info
148 149
CUresult RecordedGpuMemRelease(CUmemGenericAllocationHandle handle,
                               size_t size,
150
                               int dev_id);
151 152 153
#endif
#endif

154
//! Get available and total gpu memory with considering limitation
155 156 157 158 159
bool RecordedGpuMemGetInfo(size_t *avail,
                           size_t *total,
                           size_t *actual_avail,
                           size_t *actual_total,
                           int dev_id);
160 161

//! Get recorded cudaMalloc size. If record is disabled, return 0.
162
uint64_t RecordedGpuMallocSize(int dev_id);
163

164 165
uint64_t RecordedGpuLimitSize(int dev_id);

166
bool IsGpuMallocRecorded(int dev_id);
167

168 169 170
//! Empty idle cached memory held by the allocator.
void EmptyCache(void);

171 172 173 174
bool IsGPUManagedMemorySupported(int dev_id);

bool IsGPUManagedMemoryOversubscriptionSupported(int dev_id);

F
From00 已提交
175 176
//! Get the primitive pointer return from cudaMalloc, just implemented with
//! testing, do not use for release
F
From00 已提交
177 178
void *GetGpuBasePtr(void *ptr, int dev_id);

L
liaogang 已提交
179 180 181
}  // namespace platform
}  // namespace paddle

L
Luo Tao 已提交
182
#endif