gpu_info.h 5.2 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
L
liaogang 已提交
2 3 4 5 6 7 8 9 10 11 12 13
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

14
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
15

L
liaogang 已提交
16
#include <stddef.h>
17

W
Wilber 已提交
18
#include <array>
L
liaogang 已提交
19
#include <string>
20
#include <vector>
21 22

#include "paddle/fluid/platform/device/gpu/gpu_types.h"
L
liaogang 已提交
23 24 25

namespace paddle {
namespace platform {
26 27
//! Get the version of dnn
int DnnVersion();
L
liaogang 已提交
28 29

//! Get the total number of GPU devices in system.
30
int GetGPUDeviceCount();
L
liaogang 已提交
31

32
//! Get the compute capability of the ith GPU (format: major * 10 + minor)
33
int GetGPUComputeCapability(int id);
34

C
chengduo 已提交
35
//! Get the runtime version of the ith GPU
36
int GetGPURuntimeVersion(int id);
C
chengduo 已提交
37 38

//! Get the driver version of the ith GPU
39
int GetGPUDriverVersion(int id);
C
chengduo 已提交
40

41 42 43
//! Wheter the current device support TensorCore
bool TensorCoreAvailable();

C
chengduoZH 已提交
44
//! Get the MultiProcessors of the ith GPU.
45
int GetGPUMultiProcessors(int id);
C
chengduoZH 已提交
46 47

//! Get the MaxThreads of each MultiProcessor of the ith GPU.
48
int GetGPUMaxThreadsPerMultiProcessor(int id);
C
chengduoZH 已提交
49

50
//! Get the MaxThreads of each block of the ith GPU.
51
int GetGPUMaxThreadsPerBlock(int id);
52

L
liaogang 已提交
53 54 55
//! Get the current GPU device id in system.
int GetCurrentDeviceId();

56
//! Get the maximum GridDim size for GPU buddy allocator.
W
Wilber 已提交
57
std::array<int, 3> GetGpuMaxGridDimSize(int);
58

59 60 61
//! Get a list of device ids from environment variable or use all.
std::vector<int> GetSelectedDevices();

62 63 64
//! Get the properties of the ith GPU device.
const gpuDeviceProp &GetDeviceProperties(int id);

L
liaogang 已提交
65 66 67
//! Set the GPU device id for next execution.
void SetDeviceId(int device_id);

Q
Qiao Longfei 已提交
68
//! Get the memory usage of current GPU device.
69
void GpuMemoryUsage(size_t *available, size_t *total);
L
liaogang 已提交
70

71 72 73 74
//! Get the available memory to allocate, which is the size of available gpu
//! minus reserving.
size_t GpuAvailableMemToAlloc();

L
liaogang 已提交
75 76 77
//! Get the maximum allocation size of current GPU device.
size_t GpuMaxAllocSize();

Z
zhhsplendid 已提交
78 79 80 81 82 83
//! Get the initial allocation size of current GPU device.
size_t GpuInitAllocSize();

//! Get the re-allocation size of current GPU device.
size_t GpuReallocSize();

L
liaogang 已提交
84 85 86 87 88 89
//! Get the minimum chunk size for GPU buddy allocator.
size_t GpuMinChunkSize();

//! Get the maximum chunk size for GPU buddy allocator.
size_t GpuMaxChunkSize();

L
liaogang 已提交
90 91
//! Copy memory from address src to dst asynchronously.
void GpuMemcpyAsync(void *dst, const void *src, size_t count,
92
                    gpuMemcpyKind kind, gpuStream_t stream);
L
liaogang 已提交
93

94 95
//! Copy memory from address src to dst synchronously.
void GpuMemcpySync(void *dst, const void *src, size_t count,
96
                   gpuMemcpyKind kind);
97 98 99

//! Copy memory from one device to another device asynchronously.
void GpuMemcpyPeerAsync(void *dst, int dst_device, const void *src,
100
                        int src_device, size_t count, gpuStream_t stream);
101 102 103 104

//! Copy memory from one device to another device synchronously.
void GpuMemcpyPeerSync(void *dst, int dst_device, const void *src,
                       int src_device, size_t count);
L
liaogang 已提交
105

D
dzhwinter 已提交
106
//! Set memory dst with value count size asynchronously
107
void GpuMemsetAsync(void *dst, int value, size_t count, gpuStream_t stream);
D
dzhwinter 已提交
108

石晓伟 已提交
109
//! Blocks until stream has completed all operations.
110
void GpuStreamSync(gpuStream_t stream);
石晓伟 已提交
111

112 113 114
void GpuDestroyStream(gpuStream_t stream);

// ! Blocks until device has completed all operations.
W
Wilber 已提交
115
void GpuDeviceSync();
116

117
//! CudaMalloc with recorded info
118 119
gpuError_t RecordedGpuMalloc(void **ptr, size_t size, int dev_id,
                             bool malloc_managed_memory = false);
120 121

//! CudaFree with recorded info
122 123 124
void RecordedGpuFree(void *p, size_t size, int dev_id);

gpuError_t GpuGetLastError();
125

126 127 128
#ifdef PADDLE_WITH_CUDA
#if CUDA_VERSION >= 10020
//! cuMemCreate with recorded info
129 130 131
CUresult RecordedGpuMemCreate(CUmemGenericAllocationHandle *handle, size_t size,
                              const CUmemAllocationProp *prop,
                              unsigned long long flags, int dev_id);  // NOLINT
132 133

//! cuMemRelease with recorded info
134 135
CUresult RecordedGpuMemRelease(CUmemGenericAllocationHandle handle, size_t size,
                               int dev_id);
136 137 138
#endif
#endif

139
//! Get available and total gpu memory with considering limitation
140 141
bool RecordedGpuMemGetInfo(size_t *avail, size_t *total, size_t *actual_avail,
                           size_t *actual_total, int dev_id);
142 143

//! Get recorded cudaMalloc size. If record is disabled, return 0.
144
uint64_t RecordedGpuMallocSize(int dev_id);
145

146 147
uint64_t RecordedGpuLimitSize(int dev_id);

148
bool IsGpuMallocRecorded(int dev_id);
149

150 151 152
//! Empty idle cached memory held by the allocator.
void EmptyCache(void);

153 154 155 156
bool IsGPUManagedMemorySupported(int dev_id);

bool IsGPUManagedMemoryOversubscriptionSupported(int dev_id);

F
From00 已提交
157 158
//! Get the primitive pointer return from cudaMalloc, just implemented with
//! testing, do not use for release
F
From00 已提交
159 160
void *GetGpuBasePtr(void *ptr, int dev_id);

L
liaogang 已提交
161 162 163
}  // namespace platform
}  // namespace paddle

L
Luo Tao 已提交
164
#endif