// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once #include #include #include #include #include #include "lite/utils/cp_logging.h" /* * This file contains some CUDA specific utils. */ // For quickly implementing the prototype, some of the following code snippets // are borrowed from project MXNet, great thanks for the original developers. #define CHECK_CUDA_ERROR(msg) \ { \ auto e = cudaGetLastError(); \ CHECK_EQ(e, cudaSuccess) << (msg) << " CUDA: " << cudaGetErrorString(e); \ } #define CUDA_CALL(func) \ { \ auto e = (func); \ CHECK(e == cudaSuccess || e == cudaErrorCudartUnloading) \ << "CUDA: " << cudaGetErrorString(e); \ } #define CUBLAS_CALL(func) \ { \ auto e = (func); \ CHECK_EQ(e, CUBLAS_STATUS_SUCCESS) \ << "cuBlas: " << paddle::lite::cuda::CublasErrorInfo(e); \ } #define CUDNN_VERSION_MIN(major, minor, patch) \ (CUDNN_VERSION >= (major * 1000 + minor * 100 + patch)) #define CUDNN_CHECK(condition) \ { \ cudnnStatus_t status = condition; \ CHECK_EQ(status, CUDNN_STATUS_SUCCESS) << CudnnGetErrorInfo(status); \ } const int CUDA_NUM_THREADS = 512; // CUDA: number of blocks for threads. inline int CUDA_GET_BLOCKS(const int N) { return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS; } inline int CUDA_GET_BLOCKS(const int N, const int base) { return (N + base - 1) / base; } namespace paddle { namespace lite { namespace cuda { static const char* CublasErrorInfo(int error) { switch (error) { #define LITE_CUBLAS_ERROR_INFO(xx) \ case xx: \ return #xx; \ break; LITE_CUBLAS_ERROR_INFO(CUBLAS_STATUS_NOT_INITIALIZED); LITE_CUBLAS_ERROR_INFO(CUBLAS_STATUS_ALLOC_FAILED); LITE_CUBLAS_ERROR_INFO(CUBLAS_STATUS_INVALID_VALUE); LITE_CUBLAS_ERROR_INFO(CUBLAS_STATUS_ARCH_MISMATCH); LITE_CUBLAS_ERROR_INFO(CUBLAS_STATUS_MAPPING_ERROR); LITE_CUBLAS_ERROR_INFO(CUBLAS_STATUS_EXECUTION_FAILED); LITE_CUBLAS_ERROR_INFO(CUBLAS_STATUS_INTERNAL_ERROR); LITE_CUBLAS_ERROR_INFO(CUBLAS_STATUS_NOT_SUPPORTED); LITE_CUBLAS_ERROR_INFO(CUBLAS_STATUS_LICENSE_ERROR); #undef LITE_CUBLAS_ERROR_INFO default: return "unknown error"; } } static const char* CudnnGetErrorInfo(cudnnStatus_t status) { switch (status) { case CUDNN_STATUS_SUCCESS: return "CUDNN_STATUS_SUCCESS"; case CUDNN_STATUS_NOT_INITIALIZED: return "CUDNN_STATUS_NOT_INITIALIZED"; case CUDNN_STATUS_ALLOC_FAILED: return "CUDNN_STATUS_ALLOC_FAILED"; case CUDNN_STATUS_BAD_PARAM: return "CUDNN_STATUS_BAD_PARAM"; case CUDNN_STATUS_INTERNAL_ERROR: return "CUDNN_STATUS_INTERNAL_ERROR"; case CUDNN_STATUS_INVALID_VALUE: return "CUDNN_STATUS_INVALID_VALUE"; case CUDNN_STATUS_ARCH_MISMATCH: return "CUDNN_STATUS_ARCH_MISMATCH"; case CUDNN_STATUS_MAPPING_ERROR: return "CUDNN_STATUS_MAPPING_ERROR"; case CUDNN_STATUS_EXECUTION_FAILED: return "CUDNN_STATUS_EXECUTION_FAILED"; case CUDNN_STATUS_NOT_SUPPORTED: return "CUDNN_STATUS_NOT_SUPPORTED"; case CUDNN_STATUS_LICENSE_ERROR: return "CUDNN_STATUS_LICENSE_ERROR"; #if CUDNN_VERSION_MIN(6, 0, 0) case CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING: return "CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING"; #endif #if CUDNN_VERSION_MIN(7, 0, 0) case CUDNN_STATUS_RUNTIME_IN_PROGRESS: return "CUDNN_STATUS_RUNTIME_IN_PROGRESS"; case CUDNN_STATUS_RUNTIME_FP_OVERFLOW: return "CUDNN_STATUS_RUNTIME_FP_OVERFLOW"; #endif } return "Unknown cudnn status"; } } // namespace cuda } // namespace lite } // namespace paddle