diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 851520328f3ce74969dfc92ef73f1ba17ce482d3..2a1e6897c02e445b799815b0fdc498774e1f37ad 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -100,6 +100,11 @@ if(WITH_GPU) if(NOT CUDNN_FOUND) message(FATAL_ERROR "Paddle needs cudnn to compile") endif() + + if(${CUDNN_MAJOR_VERSION} VERSION_LESS 7) + message(FATAL_ERROR "Paddle needs CUDNN >= 7.0 to compile") + endif() + if(CUPTI_FOUND) include_directories(${CUPTI_INCLUDE_DIR}) add_definitions(-DPADDLE_WITH_CUPTI) diff --git a/paddle/fluid/platform/dynload/cudnn.cc b/paddle/fluid/platform/dynload/cudnn.cc index 4c59fe5e9bae4b751d87b0d2feb1ea0bd02bcf1d..366762401c741e570204f5c9d146343a3d60aa33 100644 --- a/paddle/fluid/platform/dynload/cudnn.cc +++ b/paddle/fluid/platform/dynload/cudnn.cc @@ -24,26 +24,9 @@ void* cudnn_dso_handle = nullptr; #define DEFINE_WRAP(__name) DynLoad__##__name __name CUDNN_DNN_ROUTINE_EACH(DEFINE_WRAP); -CUDNN_DNN_ROUTINE_EACH_R2(DEFINE_WRAP); -#ifdef CUDNN_DNN_ROUTINE_EACH_AFTER_R3 -CUDNN_DNN_ROUTINE_EACH_AFTER_R3(DEFINE_WRAP); -#endif - -#ifdef CUDNN_DNN_ROUTINE_EACH_AFTER_R3_LESS_R8 -CUDNN_DNN_ROUTINE_EACH_AFTER_R3_LESS_R8(DEFINE_WRAP); -#endif - -#ifdef CUDNN_DNN_ROUTINE_EACH_AFTER_R4 -CUDNN_DNN_ROUTINE_EACH_AFTER_R4(DEFINE_WRAP); -#endif - -#ifdef CUDNN_DNN_ROUTINE_EACH_R5 -CUDNN_DNN_ROUTINE_EACH_R5(DEFINE_WRAP); -#endif - -#ifdef CUDNN_DNN_ROUTINE_EACH_R6 -CUDNN_DNN_ROUTINE_EACH_R6(DEFINE_WRAP); +#ifdef CUDNN_DNN_ROUTINE_EACH_AFTER_R7_LESS_R8 +CUDNN_DNN_ROUTINE_EACH_AFTER_R7_LESS_R8(DEFINE_WRAP); #endif #ifdef CUDNN_DNN_ROUTINE_EACH_R7 diff --git a/paddle/fluid/platform/dynload/cudnn.h b/paddle/fluid/platform/dynload/cudnn.h index db84b8731f9ca467c4521221a3dbe0b1fc61b597..f5045ff004ee9b8391a879c402c679f0078487a2 100644 --- a/paddle/fluid/platform/dynload/cudnn.h +++ b/paddle/fluid/platform/dynload/cudnn.h @@ -48,121 +48,93 @@ extern void EnforceCUDNNLoaded(const char* fn_name); * include all needed cudnn functions in HPPL * different cudnn version has different interfaces **/ -#define CUDNN_DNN_ROUTINE_EACH(__macro) \ - __macro(cudnnSetTensor4dDescriptor); \ - __macro(cudnnSetTensor4dDescriptorEx); \ - __macro(cudnnSetTensorNdDescriptor); \ - __macro(cudnnGetTensorNdDescriptor); \ - __macro(cudnnGetConvolutionNdForwardOutputDim); \ - __macro(cudnnCreateTensorDescriptor); \ - __macro(cudnnDestroyTensorDescriptor); \ - __macro(cudnnCreateFilterDescriptor); \ - __macro(cudnnSetFilter4dDescriptor); \ - __macro(cudnnSetFilterNdDescriptor); \ - __macro(cudnnGetFilterNdDescriptor); \ - __macro(cudnnSetPooling2dDescriptor); \ - __macro(cudnnSetPoolingNdDescriptor); \ - __macro(cudnnGetPoolingNdDescriptor); \ - __macro(cudnnDestroyFilterDescriptor); \ - __macro(cudnnCreateConvolutionDescriptor); \ - __macro(cudnnCreatePoolingDescriptor); \ - __macro(cudnnDestroyPoolingDescriptor); \ - __macro(cudnnSetConvolution2dDescriptor); \ - __macro(cudnnDestroyConvolutionDescriptor); \ - __macro(cudnnSetConvolutionNdDescriptor); \ - __macro(cudnnGetConvolutionNdDescriptor); \ - __macro(cudnnDeriveBNTensorDescriptor); \ - __macro(cudnnCreateSpatialTransformerDescriptor); \ - __macro(cudnnSetSpatialTransformerNdDescriptor); \ - __macro(cudnnDestroySpatialTransformerDescriptor); \ - __macro(cudnnSpatialTfGridGeneratorForward); \ - __macro(cudnnSpatialTfGridGeneratorBackward); \ - __macro(cudnnSpatialTfSamplerForward); \ - __macro(cudnnSpatialTfSamplerBackward); \ - __macro(cudnnCreate); \ - __macro(cudnnDestroy); \ - __macro(cudnnSetStream); \ - __macro(cudnnActivationForward); \ - __macro(cudnnActivationBackward); \ - __macro(cudnnConvolutionForward); \ - __macro(cudnnConvolutionBackwardBias); \ - __macro(cudnnGetConvolutionForwardWorkspaceSize); \ - __macro(cudnnTransformTensor); \ - __macro(cudnnPoolingForward); \ - __macro(cudnnPoolingBackward); \ - __macro(cudnnSoftmaxBackward); \ - __macro(cudnnSoftmaxForward); \ - __macro(cudnnGetVersion); \ - __macro(cudnnFindConvolutionForwardAlgorithmEx); \ - __macro(cudnnFindConvolutionBackwardFilterAlgorithmEx); \ - __macro(cudnnFindConvolutionBackwardFilterAlgorithm); \ - __macro(cudnnFindConvolutionBackwardDataAlgorithmEx); \ - __macro(cudnnGetErrorString); \ - __macro(cudnnCreateDropoutDescriptor); \ - __macro(cudnnDropoutGetStatesSize); \ - __macro(cudnnSetDropoutDescriptor); \ - __macro(cudnnRestoreDropoutDescriptor); \ - __macro(cudnnCreateRNNDescriptor); \ - __macro(cudnnGetRNNParamsSize); \ - __macro(cudnnGetRNNWorkspaceSize); \ - __macro(cudnnGetRNNTrainingReserveSize); \ - __macro(cudnnRNNForwardTraining); \ - __macro(cudnnRNNBackwardData); \ - __macro(cudnnRNNBackwardWeights); \ - __macro(cudnnRNNForwardInference); \ - __macro(cudnnDestroyDropoutDescriptor); \ - __macro(cudnnDestroyRNNDescriptor); \ - __macro(cudnnSetTensorNdDescriptorEx); - -CUDNN_DNN_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) - -#define CUDNN_DNN_ROUTINE_EACH_R2(__macro) \ - __macro(cudnnAddTensor); \ - __macro(cudnnConvolutionBackwardData); \ - __macro(cudnnConvolutionBackwardFilter); -CUDNN_DNN_ROUTINE_EACH_R2(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) - -// APIs available after R3: -#if CUDNN_VERSION >= 3000 -#define CUDNN_DNN_ROUTINE_EACH_AFTER_R3(__macro) \ +#define CUDNN_DNN_ROUTINE_EACH(__macro) \ + __macro(cudnnSetTensor4dDescriptor); \ + __macro(cudnnSetTensor4dDescriptorEx); \ + __macro(cudnnSetTensorNdDescriptor); \ + __macro(cudnnGetTensorNdDescriptor); \ + __macro(cudnnGetConvolutionNdForwardOutputDim); \ + __macro(cudnnCreateTensorDescriptor); \ + __macro(cudnnDestroyTensorDescriptor); \ + __macro(cudnnCreateFilterDescriptor); \ + __macro(cudnnSetFilter4dDescriptor); \ + __macro(cudnnSetFilterNdDescriptor); \ + __macro(cudnnGetFilterNdDescriptor); \ + __macro(cudnnSetPooling2dDescriptor); \ + __macro(cudnnSetPoolingNdDescriptor); \ + __macro(cudnnGetPoolingNdDescriptor); \ + __macro(cudnnDestroyFilterDescriptor); \ + __macro(cudnnCreateConvolutionDescriptor); \ + __macro(cudnnCreatePoolingDescriptor); \ + __macro(cudnnDestroyPoolingDescriptor); \ + __macro(cudnnSetConvolution2dDescriptor); \ + __macro(cudnnDestroyConvolutionDescriptor); \ + __macro(cudnnSetConvolutionNdDescriptor); \ + __macro(cudnnGetConvolutionNdDescriptor); \ + __macro(cudnnDeriveBNTensorDescriptor); \ + __macro(cudnnCreateSpatialTransformerDescriptor); \ + __macro(cudnnSetSpatialTransformerNdDescriptor); \ + __macro(cudnnDestroySpatialTransformerDescriptor); \ + __macro(cudnnSpatialTfGridGeneratorForward); \ + __macro(cudnnSpatialTfGridGeneratorBackward); \ + __macro(cudnnSpatialTfSamplerForward); \ + __macro(cudnnSpatialTfSamplerBackward); \ + __macro(cudnnCreate); \ + __macro(cudnnDestroy); \ + __macro(cudnnSetStream); \ + __macro(cudnnActivationForward); \ + __macro(cudnnActivationBackward); \ + __macro(cudnnConvolutionForward); \ + __macro(cudnnConvolutionBackwardBias); \ + __macro(cudnnGetConvolutionForwardWorkspaceSize); \ + __macro(cudnnTransformTensor); \ + __macro(cudnnPoolingForward); \ + __macro(cudnnPoolingBackward); \ + __macro(cudnnSoftmaxBackward); \ + __macro(cudnnSoftmaxForward); \ + __macro(cudnnGetVersion); \ + __macro(cudnnFindConvolutionForwardAlgorithmEx); \ + __macro(cudnnFindConvolutionBackwardFilterAlgorithmEx); \ + __macro(cudnnFindConvolutionBackwardFilterAlgorithm); \ + __macro(cudnnFindConvolutionBackwardDataAlgorithmEx); \ + __macro(cudnnGetErrorString); \ + __macro(cudnnCreateDropoutDescriptor); \ + __macro(cudnnDropoutGetStatesSize); \ + __macro(cudnnSetDropoutDescriptor); \ + __macro(cudnnRestoreDropoutDescriptor); \ + __macro(cudnnCreateRNNDescriptor); \ + __macro(cudnnGetRNNParamsSize); \ + __macro(cudnnGetRNNWorkspaceSize); \ + __macro(cudnnGetRNNTrainingReserveSize); \ + __macro(cudnnRNNForwardTraining); \ + __macro(cudnnRNNBackwardData); \ + __macro(cudnnRNNBackwardWeights); \ + __macro(cudnnRNNForwardInference); \ + __macro(cudnnDestroyDropoutDescriptor); \ + __macro(cudnnDestroyRNNDescriptor); \ + __macro(cudnnSetTensorNdDescriptorEx); \ + __macro(cudnnAddTensor); \ + __macro(cudnnConvolutionBackwardData); \ + __macro(cudnnConvolutionBackwardFilter); \ __macro(cudnnGetConvolutionBackwardFilterWorkspaceSize); \ - __macro(cudnnGetConvolutionBackwardDataWorkspaceSize); -CUDNN_DNN_ROUTINE_EACH_AFTER_R3(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) -#endif + __macro(cudnnGetConvolutionBackwardDataWorkspaceSize); \ + __macro(cudnnBatchNormalizationForwardTraining); \ + __macro(cudnnBatchNormalizationForwardInference); \ + __macro(cudnnBatchNormalizationBackward); \ + __macro(cudnnCreateActivationDescriptor); \ + __macro(cudnnSetActivationDescriptor); \ + __macro(cudnnGetActivationDescriptor); \ + __macro(cudnnDestroyActivationDescriptor); \ + __macro(cudnnSetRNNDescriptor_v6); +CUDNN_DNN_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) -// APIs available after R3: -#if CUDNN_VERSION >= 3000 && CUDNN_VERSION < 8000 -#define CUDNN_DNN_ROUTINE_EACH_AFTER_R3_LESS_R8(__macro) \ +#if CUDNN_VERSION >= 7000 && CUDNN_VERSION < 8000 +#define CUDNN_DNN_ROUTINE_EACH_AFTER_R7_LESS_R8(__macro) \ __macro(cudnnGetConvolutionBackwardFilterAlgorithm); \ __macro(cudnnGetConvolutionForwardAlgorithm); \ __macro(cudnnGetConvolutionBackwardDataAlgorithm); \ __macro(cudnnSetRNNDescriptor); -CUDNN_DNN_ROUTINE_EACH_AFTER_R3_LESS_R8(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) -#endif - -// APIs available after R4: -#if CUDNN_VERSION >= 4007 -#define CUDNN_DNN_ROUTINE_EACH_AFTER_R4(__macro) \ - __macro(cudnnBatchNormalizationForwardTraining); \ - __macro(cudnnBatchNormalizationForwardInference); \ - __macro(cudnnBatchNormalizationBackward); -CUDNN_DNN_ROUTINE_EACH_AFTER_R4(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) -#endif - -// APIs in R5 -#if CUDNN_VERSION >= 5000 -#define CUDNN_DNN_ROUTINE_EACH_R5(__macro) \ - __macro(cudnnCreateActivationDescriptor); \ - __macro(cudnnSetActivationDescriptor); \ - __macro(cudnnGetActivationDescriptor); \ - __macro(cudnnDestroyActivationDescriptor); -CUDNN_DNN_ROUTINE_EACH_R5(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) -#endif - -// APIs in R6 -#if CUDNN_VERSION >= 6000 -#define CUDNN_DNN_ROUTINE_EACH_R6(__macro) __macro(cudnnSetRNNDescriptor_v6); -CUDNN_DNN_ROUTINE_EACH_R6(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) +CUDNN_DNN_ROUTINE_EACH_AFTER_R7_LESS_R8(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) #endif #if CUDNN_VERSION >= 7001