提交 772e0956 编写于 作者: Z zhouwei25 提交者: liuwei1031

Optimize the content of error reporting information, print error code and...

Optimize the content of error reporting information, print error code and official document web sites (#18671)

 optimize the error reporting information of cuda related API
 index on develop: 130ac177 Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into develop
上级 ae58afc5
......@@ -79,6 +79,12 @@ DEFINE_string(selected_gpus, "",
namespace paddle {
namespace platform {
inline std::string CudaErrorWebsite() {
return "Please see detail in https://docs.nvidia.com/cuda/cuda-runtime-api"
"/group__CUDART__TYPES.html#group__CUDART__TYPES_1g3f51e3575c217824"
"6db0a94a430e0038";
}
static int GetCUDADeviceCountImpl() {
const auto *cuda_visible_devices = std::getenv("CUDA_VISIBLE_DEVICES");
if (cuda_visible_devices != nullptr) {
......@@ -92,9 +98,12 @@ static int GetCUDADeviceCountImpl() {
}
int count;
auto error_code = cudaGetDeviceCount(&count);
PADDLE_ENFORCE(
cudaGetDeviceCount(&count),
"cudaGetDeviceCount failed in paddle::platform::GetCUDADeviceCount");
error_code,
"cudaGetDeviceCount failed in "
"paddle::platform::GetCUDADeviceCountImpl, error code : %d, %s",
error_code, CudaErrorWebsite());
return count;
}
......@@ -107,28 +116,33 @@ int GetCUDAComputeCapability(int id) {
PADDLE_ENFORCE_LT(id, GetCUDADeviceCount(), "id must less than GPU count");
cudaDeviceProp device_prop;
auto error_code = cudaGetDeviceProperties(&device_prop, id);
PADDLE_ENFORCE(error_code,
"cudaGetDeviceProperties failed in "
"paddle::platform::GetCUDAComputeCapability, error code : %d",
error_code);
PADDLE_ENFORCE(
error_code,
"cudaGetDeviceProperties failed in "
"paddle::platform::GetCUDAComputeCapability, error code : %d, %s",
error_code, CudaErrorWebsite());
return device_prop.major * 10 + device_prop.minor;
}
int GetCUDARuntimeVersion(int id) {
PADDLE_ENFORCE_LT(id, GetCUDADeviceCount(), "id must less than GPU count");
int runtime_version = 0;
PADDLE_ENFORCE(cudaRuntimeGetVersion(&runtime_version),
auto error_code = cudaRuntimeGetVersion(&runtime_version);
PADDLE_ENFORCE(error_code,
"cudaRuntimeGetVersion failed in "
"paddle::platform::cudaRuntimeGetVersion");
"paddle::platform::GetCUDARuntimeVersion, error code : %d, %s",
error_code, CudaErrorWebsite());
return runtime_version;
}
int GetCUDADriverVersion(int id) {
PADDLE_ENFORCE_LT(id, GetCUDADeviceCount(), "id must less than GPU count");
int driver_version = 0;
PADDLE_ENFORCE(cudaDriverGetVersion(&driver_version),
auto error_code = cudaDriverGetVersion(&driver_version);
PADDLE_ENFORCE(error_code,
"cudaDriverGetVersion failed in "
"paddle::platform::GetCUDADriverVersion");
"paddle::platform::GetCUDADriverVersion, error code : %d, %s",
error_code, CudaErrorWebsite());
return driver_version;
}
......@@ -145,28 +159,35 @@ bool TensorCoreAvailable() {
int GetCUDAMultiProcessors(int id) {
PADDLE_ENFORCE_LT(id, GetCUDADeviceCount(), "id must less than GPU count");
int count;
PADDLE_ENFORCE(
cudaDeviceGetAttribute(&count, cudaDevAttrMultiProcessorCount, id),
"cudaDeviceGetAttribute failed in "
"paddle::platform::GetCUDAMultiProcessors");
auto error_code =
cudaDeviceGetAttribute(&count, cudaDevAttrMultiProcessorCount, id);
PADDLE_ENFORCE(error_code,
"cudaDeviceGetAttribute failed in "
"paddle::platform::GetCUDAMultiProcess, error code : %d, %s",
error_code, CudaErrorWebsite());
return count;
}
int GetCUDAMaxThreadsPerMultiProcessor(int id) {
PADDLE_ENFORCE_LT(id, GetCUDADeviceCount(), "id must less than GPU count");
int count;
PADDLE_ENFORCE(cudaDeviceGetAttribute(
&count, cudaDevAttrMaxThreadsPerMultiProcessor, id),
"cudaDeviceGetAttribute failed in "
"paddle::platform::GetCUDAMaxThreadsPerMultiProcessor");
auto error_code = cudaDeviceGetAttribute(
&count, cudaDevAttrMaxThreadsPerMultiProcessor, id);
PADDLE_ENFORCE(
error_code,
"cudaDeviceGetAttribute failed in paddle::"
"platform::GetCUDAMaxThreadsPerMultiProcessor, error code : %d, %s",
error_code, CudaErrorWebsite());
return count;
}
int GetCurrentDeviceId() {
int device_id;
PADDLE_ENFORCE(
cudaGetDevice(&device_id),
"cudaGetDevice failed in paddle::platform::GetCurrentDeviceId");
auto error_code = cudaGetDevice(&device_id);
PADDLE_ENFORCE(error_code,
"cudaGetDevice failed in "
"paddle::platform::GetCurrentDeviceId, error code : %d, %s",
error_code, CudaErrorWebsite());
return device_id;
}
......@@ -191,13 +212,19 @@ std::vector<int> GetSelectedDevices() {
void SetDeviceId(int id) {
// TODO(qijun): find a better way to cache the cuda device count
PADDLE_ENFORCE_LT(id, GetCUDADeviceCount(), "id must less than GPU count");
PADDLE_ENFORCE(cudaSetDevice(id),
"cudaSetDevice failed in paddle::platform::SetDeviceId");
auto error_code = cudaSetDevice(id);
PADDLE_ENFORCE(error_code,
"cudaSetDevice failed in "
"paddle::platform::SetDeviced, error code : %d, %s",
error_code, CudaErrorWebsite());
}
void GpuMemoryUsage(size_t *available, size_t *total) {
PADDLE_ENFORCE(cudaMemGetInfo(available, total),
"cudaMemGetInfo failed in paddle::platform::GetMemoryUsage");
auto error_code = cudaMemGetInfo(available, total);
PADDLE_ENFORCE(error_code,
"cudaMemGetInfo failed in "
"paddle::platform::GetMemoryUsage, error code : %d, %s",
error_code, CudaErrorWebsite());
}
size_t GpuMaxAllocSize() {
......@@ -224,11 +251,13 @@ size_t GpuInitAllocSize() {
size_t GpuReallocSize() {
if (FLAGS_reallocate_gpu_memory_in_mb > 0ul) {
// Additional memory will be allocated by FLAGS_reallocate_gpu_memory_in_mb
// Additional memory will be allocated by
// FLAGS_reallocate_gpu_memory_in_mb
return static_cast<size_t>(FLAGS_reallocate_gpu_memory_in_mb << 20);
}
// FLAGS_reallocate_gpu_memory_in_mb is 0, additional memory will be allocated
// FLAGS_reallocate_gpu_memory_in_mb is 0, additional memory will be
// allocated
// by fraction
size_t total = 0;
size_t available = 0;
......@@ -268,37 +297,50 @@ size_t GpuMaxChunkSize() {
void GpuMemcpyAsync(void *dst, const void *src, size_t count,
enum cudaMemcpyKind kind, cudaStream_t stream) {
PADDLE_ENFORCE(cudaMemcpyAsync(dst, src, count, kind, stream),
auto error_code = cudaMemcpyAsync(dst, src, count, kind, stream);
PADDLE_ENFORCE(error_code,
"cudaMemcpyAsync failed in paddle::platform::GpuMemcpyAsync "
"(%p -> %p, length: %d)",
src, dst, static_cast<int>(count));
"(%p -> %p, length: %d) error code : %d, %s",
src, dst, static_cast<int>(count), error_code,
CudaErrorWebsite());
}
void GpuMemcpySync(void *dst, const void *src, size_t count,
enum cudaMemcpyKind kind) {
PADDLE_ENFORCE(cudaMemcpy(dst, src, count, kind),
"cudaMemcpy failed in paddle::platform::GpuMemcpySync (%p -> "
"%p, length: %d)",
src, dst, static_cast<int>(count));
auto error_code = cudaMemcpy(dst, src, count, kind);
PADDLE_ENFORCE(error_code,
"cudaMemcpy failed in paddle::platform::GpuMemcpySync "
"(%p -> %p, length: %d) error code : %d, %s",
src, dst, static_cast<int>(count), error_code,
CudaErrorWebsite());
}
void GpuMemcpyPeerAsync(void *dst, int dst_device, const void *src,
int src_device, size_t count, cudaStream_t stream) {
auto error_code =
cudaMemcpyPeerAsync(dst, dst_device, src, src_device, count, stream);
PADDLE_ENFORCE(
cudaMemcpyPeerAsync(dst, dst_device, src, src_device, count, stream),
"cudaMemcpyPeerAsync failed in paddle::platform::GpuMemcpyPeerAsync");
error_code,
"cudaMemcpyPeerAsync failed in paddle::platform::GpuMemcpyPeerAsync "
"error code : %d, %s",
error_code, CudaErrorWebsite());
}
void GpuMemcpyPeerSync(void *dst, int dst_device, const void *src,
int src_device, size_t count) {
PADDLE_ENFORCE(
cudaMemcpyPeer(dst, dst_device, src, src_device, count),
"cudaMemcpyPeer failed in paddle::platform::GpuMemcpyPeerSync");
auto error_code = cudaMemcpyPeer(dst, dst_device, src, src_device, count);
PADDLE_ENFORCE(error_code,
"cudaMemcpyPeer failed in paddle::platform::GpuMemcpyPeerSync "
"error code : %d, %s",
error_code, CudaErrorWebsite());
}
void GpuMemsetAsync(void *dst, int value, size_t count, cudaStream_t stream) {
PADDLE_ENFORCE(cudaMemsetAsync(dst, value, count, stream),
"cudaMemsetAsync failed in paddle::platform::GpuMemsetAsync");
auto error_code = cudaMemsetAsync(dst, value, count, stream);
PADDLE_ENFORCE(error_code,
"cudaMemsetAsync failed in paddle::platform::GpuMemsetAsync "
"error code : %d, %s",
error_code, CudaErrorWebsite());
}
} // namespace platform
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册