cuda_info.cc 11.2 KB
Newer Older
W
Wilber 已提交
1
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2 3 4 5 6 7 8 9 10 11 12 13 14
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

15
#include "paddle/phi/backends/gpu/gpu_info.h"
W
Wilber 已提交
16

17
#include "paddle/phi/core/enforce.h"
18 19 20

static std::once_flag g_device_props_size_init_flag;
static std::vector<std::unique_ptr<std::once_flag>> g_device_props_init_flags;
21
static std::vector<phi::gpuDeviceProp> g_device_props;
W
Wilber 已提交
22

23
namespace phi {
W
Wilber 已提交
24 25
namespace backends {
namespace gpu {
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75

int DnnVersion() {
  if (!dynload::HasCUDNN()) return -1;
  return dynload::cudnnGetVersion();
}

static int GetGPUDeviceCountImpl() {
  int driverVersion = 0;
  cudaError_t status = cudaDriverGetVersion(&driverVersion);

  if (!(status == gpuSuccess && driverVersion != 0)) {
    // No GPU driver
    VLOG(2) << "GPU Driver Version can't be detected. No GPU driver!";
    return 0;
  }

  const auto *cuda_visible_devices = std::getenv("CUDA_VISIBLE_DEVICES");

  if (cuda_visible_devices != nullptr) {
    std::string cuda_visible_devices_str(cuda_visible_devices);
    if (!cuda_visible_devices_str.empty()) {
      cuda_visible_devices_str.erase(
          0, cuda_visible_devices_str.find_first_not_of('\''));
      cuda_visible_devices_str.erase(
          cuda_visible_devices_str.find_last_not_of('\'') + 1);
      cuda_visible_devices_str.erase(
          0, cuda_visible_devices_str.find_first_not_of('\"'));
      cuda_visible_devices_str.erase(
          cuda_visible_devices_str.find_last_not_of('\"') + 1);
    }
    if (std::all_of(cuda_visible_devices_str.begin(),
                    cuda_visible_devices_str.end(),
                    [](char ch) { return ch == ' '; })) {
      VLOG(2) << "CUDA_VISIBLE_DEVICES is set to be "
                 "empty. No GPU detected.";
      return 0;
    }
  }
  int count;
  PADDLE_ENFORCE_GPU_SUCCESS(cudaGetDeviceCount(&count));
  return count;
}

int GetGPUDeviceCount() {
  // cache the count
  static auto dev_cnt = GetGPUDeviceCountImpl();
  return dev_cnt;
}

int GetGPUComputeCapability(int id) {
76 77 78 79 80 81 82
  PADDLE_ENFORCE_LT(
      id,
      GetGPUDeviceCount(),
      phi::errors::InvalidArgument("Device id must be less than GPU count, "
                                   "but received id is: %d. GPU count is: %d.",
                                   id,
                                   GetGPUDeviceCount()));
83 84 85 86 87 88 89 90 91 92 93 94
  int major, minor;
  auto major_error_code =
      cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, id);
  auto minor_error_code =
      cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, id);

  PADDLE_ENFORCE_GPU_SUCCESS(major_error_code);
  PADDLE_ENFORCE_GPU_SUCCESS(minor_error_code);
  return major * 10 + minor;
}

int GetGPURuntimeVersion(int id) {
95 96 97 98 99 100 101
  PADDLE_ENFORCE_LT(
      id,
      GetGPUDeviceCount(),
      phi::errors::InvalidArgument("Device id must be less than GPU count, "
                                   "but received id is: %d. GPU count is: %d.",
                                   id,
                                   GetGPUDeviceCount()));
102 103 104 105 106 107
  int runtime_version = 0;
  PADDLE_ENFORCE_GPU_SUCCESS(cudaRuntimeGetVersion(&runtime_version));
  return runtime_version;
}

int GetGPUDriverVersion(int id) {
108 109 110 111 112 113 114
  PADDLE_ENFORCE_LT(
      id,
      GetGPUDeviceCount(),
      phi::errors::InvalidArgument("Device id must be less than GPU count, "
                                   "but received id is: %d. GPU count is: %d.",
                                   id,
                                   GetGPUDeviceCount()));
115 116 117 118 119 120 121 122 123 124 125 126
  int driver_version = 0;
  PADDLE_ENFORCE_GPU_SUCCESS(cudaDriverGetVersion(&driver_version));
  return driver_version;
}

bool TensorCoreAvailable() {
  int device = GetCurrentDeviceId();
  int driver_version = GetGPUComputeCapability(device);
  return driver_version >= 70;
}

int GetGPUMultiProcessors(int id) {
127 128 129 130 131 132 133
  PADDLE_ENFORCE_LT(
      id,
      GetGPUDeviceCount(),
      phi::errors::InvalidArgument("Device id must be less than GPU count, "
                                   "but received id is: %d. GPU count is: %d.",
                                   id,
                                   GetGPUDeviceCount()));
134 135 136 137 138 139 140
  int count;
  PADDLE_ENFORCE_GPU_SUCCESS(
      cudaDeviceGetAttribute(&count, cudaDevAttrMultiProcessorCount, id));
  return count;
}

int GetGPUMaxThreadsPerMultiProcessor(int id) {
141 142 143 144 145 146 147
  PADDLE_ENFORCE_LT(
      id,
      GetGPUDeviceCount(),
      phi::errors::InvalidArgument("Device id must be less than GPU count, "
                                   "but received id is: %d. GPU count is: %d.",
                                   id,
                                   GetGPUDeviceCount()));
148 149 150 151 152 153 154 155
  int count;
  PADDLE_ENFORCE_GPU_SUCCESS(cudaDeviceGetAttribute(
      &count, cudaDevAttrMaxThreadsPerMultiProcessor, id));

  return count;
}

int GetGPUMaxThreadsPerBlock(int id) {
156 157 158 159 160 161 162
  PADDLE_ENFORCE_LT(
      id,
      GetGPUDeviceCount(),
      phi::errors::InvalidArgument("Device id must be less than GPU count, "
                                   "but received id is: %d. GPU count is: %d.",
                                   id,
                                   GetGPUDeviceCount()));
163 164 165 166 167 168 169 170 171 172 173 174
  int count;
  PADDLE_ENFORCE_GPU_SUCCESS(
      cudaDeviceGetAttribute(&count, cudaDevAttrMaxThreadsPerBlock, id));
  return count;
}

int GetCurrentDeviceId() {
  int device_id;
  PADDLE_ENFORCE_GPU_SUCCESS(cudaGetDevice(&device_id));
  return device_id;
}

W
Wilber 已提交
175
std::array<int, 3> GetGpuMaxGridDimSize(int id) {
176 177 178 179 180 181 182
  PADDLE_ENFORCE_LT(
      id,
      GetGPUDeviceCount(),
      phi::errors::InvalidArgument("Device id must be less than GPU count, "
                                   "but received id is: %d. GPU count is: %d.",
                                   id,
                                   GetGPUDeviceCount()));
W
Wilber 已提交
183
  std::array<int, 3> ret;
184 185 186
  int size;
  auto error_code_x = cudaDeviceGetAttribute(&size, cudaDevAttrMaxGridDimX, id);
  PADDLE_ENFORCE_GPU_SUCCESS(error_code_x);
W
Wilber 已提交
187
  ret[0] = size;
188 189 190

  auto error_code_y = cudaDeviceGetAttribute(&size, cudaDevAttrMaxGridDimY, id);
  PADDLE_ENFORCE_GPU_SUCCESS(error_code_y);
W
Wilber 已提交
191
  ret[1] = size;
192 193 194

  auto error_code_z = cudaDeviceGetAttribute(&size, cudaDevAttrMaxGridDimZ, id);
  PADDLE_ENFORCE_GPU_SUCCESS(error_code_z);
W
Wilber 已提交
195
  ret[2] = size;
196 197 198
  return ret;
}

199 200 201 202 203 204 205
std::pair<int, int> GetGpuStreamPriorityRange() {
  int least_priority, greatest_priority;
  PADDLE_ENFORCE_GPU_SUCCESS(
      cudaDeviceGetStreamPriorityRange(&least_priority, &greatest_priority));
  return std::make_pair(least_priority, greatest_priority);
}

206 207 208
const gpuDeviceProp &GetDeviceProperties(int id) {
  std::call_once(g_device_props_size_init_flag, [&] {
    int gpu_num = 0;
W
Wilber 已提交
209
    gpu_num = GetGPUDeviceCount();
210 211 212 213 214 215 216 217
    g_device_props_init_flags.resize(gpu_num);
    g_device_props.resize(gpu_num);
    for (int i = 0; i < gpu_num; ++i) {
      g_device_props_init_flags[i] = std::make_unique<std::once_flag>();
    }
  });

  if (id == -1) {
W
Wilber 已提交
218
    id = GetCurrentDeviceId();
219 220 221
  }

  if (id < 0 || id >= static_cast<int>(g_device_props.size())) {
222
    PADDLE_THROW(phi::errors::OutOfRange(
223 224 225 226
        "The device id %d is out of range [0, %d), where %d is the number of "
        "devices on this machine. Because the device id should be greater than "
        "or equal to zero and smaller than the number of gpus. Please input "
        "appropriate device again!",
W
Wilber 已提交
227 228
        id,
        static_cast<int>(g_device_props.size()),
229 230 231 232 233 234 235 236 237 238 239 240 241
        static_cast<int>(g_device_props.size())));
  }

  std::call_once(*(g_device_props_init_flags[id]), [&] {
    PADDLE_ENFORCE_GPU_SUCCESS(
        cudaGetDeviceProperties(&g_device_props[id], id));
  });

  return g_device_props[id];
}

void SetDeviceId(int id) {
  // TODO(qijun): find a better way to cache the cuda device count
242 243 244 245 246 247 248
  PADDLE_ENFORCE_LT(
      id,
      GetGPUDeviceCount(),
      phi::errors::InvalidArgument("Device id must be less than GPU count, "
                                   "but received id is: %d. GPU count is: %d.",
                                   id,
                                   GetGPUDeviceCount()));
249
  PADDLE_RETRY_CUDA_SUCCESS(cudaSetDevice(id));
250
  VLOG(4) << "SetDeviceId " << id;
251 252
}

W
Wilber 已提交
253 254 255 256 257
void GpuMemcpyAsync(void *dst,
                    const void *src,
                    size_t count,
                    gpuMemcpyKind kind,
                    gpuStream_t stream) {
258 259 260
  PADDLE_ENFORCE_GPU_SUCCESS(cudaMemcpyAsync(dst, src, count, kind, stream));
}

W
Wilber 已提交
261 262 263
void GpuMemcpySync(void *dst,
                   const void *src,
                   size_t count,
264 265 266 267
                   gpuMemcpyKind kind) {
  PADDLE_ENFORCE_GPU_SUCCESS(cudaMemcpy(dst, src, count, kind));
}

W
Wilber 已提交
268 269 270 271 272 273
void GpuMemcpyPeerAsync(void *dst,
                        int dst_device,
                        const void *src,
                        int src_device,
                        size_t count,
                        gpuStream_t stream) {
274 275 276 277
  PADDLE_ENFORCE_GPU_SUCCESS(
      cudaMemcpyPeerAsync(dst, dst_device, src, src_device, count, stream));
}

W
Wilber 已提交
278 279
void GpuMemcpyPeerSync(
    void *dst, int dst_device, const void *src, int src_device, size_t count) {
280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298
  PADDLE_ENFORCE_GPU_SUCCESS(
      cudaMemcpyPeer(dst, dst_device, src, src_device, count));
}

void GpuMemsetAsync(void *dst, int value, size_t count, gpuStream_t stream) {
  PADDLE_ENFORCE_GPU_SUCCESS(cudaMemsetAsync(dst, value, count, stream));
}

void GpuStreamSync(gpuStream_t stream) {
  PADDLE_ENFORCE_GPU_SUCCESS(cudaStreamSynchronize(stream));
}

void GpuDestroyStream(gpuStream_t stream) {
  PADDLE_ENFORCE_GPU_SUCCESS(cudaStreamDestroy(stream));
}

void GpuDeviceSync() { PADDLE_ENFORCE_GPU_SUCCESS(cudaDeviceSynchronize()); }

gpuError_t GpuGetLastError() { return cudaGetLastError(); }
W
Wilber 已提交
299

300 301 302 303
// See
// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#um-requirements
// for more detail about managed memory requirements
bool IsGPUManagedMemorySupported(int dev_id) {
304 305 306 307 308 309 310
  PADDLE_ENFORCE_LT(
      dev_id,
      GetGPUDeviceCount(),
      phi::errors::InvalidArgument("Device id must be less than GPU count, "
                                   "but received id is: %d. GPU count is: %d.",
                                   dev_id,
                                   GetGPUDeviceCount()));
311 312 313 314 315 316 317 318 319 320 321
#if defined(__linux__) || defined(_WIN32)
  int ManagedMemoryAttr;
  PADDLE_ENFORCE_GPU_SUCCESS(cudaDeviceGetAttribute(
      &ManagedMemoryAttr, cudaDevAttrManagedMemory, dev_id));
  return ManagedMemoryAttr != 0;
#else
  return false;
#endif
}

bool IsGPUManagedMemoryOversubscriptionSupported(int dev_id) {
322 323 324 325 326 327 328
  PADDLE_ENFORCE_LT(
      dev_id,
      GetGPUDeviceCount(),
      phi::errors::InvalidArgument("Device id must be less than GPU count, "
                                   "but received id is: %d. GPU count is: %d.",
                                   dev_id,
                                   GetGPUDeviceCount()));
329 330 331 332 333 334 335 336
#ifdef __linux__
  return IsGPUManagedMemorySupported(dev_id) &&
         GetGPUComputeCapability(dev_id) >= 60;
#else
  return false;
#endif
}

W
Wilber 已提交
337 338
}  // namespace gpu
}  // namespace backends
339
}  // namespace phi