未验证 提交 b8b82b72 编写于 作者: T tianshuo78520a 提交者: GitHub

Del cudnn6 code2 (#31986)

上级 a17c3691
...@@ -100,6 +100,11 @@ if(WITH_GPU) ...@@ -100,6 +100,11 @@ if(WITH_GPU)
if(NOT CUDNN_FOUND) if(NOT CUDNN_FOUND)
message(FATAL_ERROR "Paddle needs cudnn to compile") message(FATAL_ERROR "Paddle needs cudnn to compile")
endif() endif()
if(${CUDNN_MAJOR_VERSION} VERSION_LESS 7)
message(FATAL_ERROR "Paddle needs CUDNN >= 7.0 to compile")
endif()
if(CUPTI_FOUND) if(CUPTI_FOUND)
include_directories(${CUPTI_INCLUDE_DIR}) include_directories(${CUPTI_INCLUDE_DIR})
add_definitions(-DPADDLE_WITH_CUPTI) add_definitions(-DPADDLE_WITH_CUPTI)
......
...@@ -24,26 +24,9 @@ void* cudnn_dso_handle = nullptr; ...@@ -24,26 +24,9 @@ void* cudnn_dso_handle = nullptr;
#define DEFINE_WRAP(__name) DynLoad__##__name __name #define DEFINE_WRAP(__name) DynLoad__##__name __name
CUDNN_DNN_ROUTINE_EACH(DEFINE_WRAP); CUDNN_DNN_ROUTINE_EACH(DEFINE_WRAP);
CUDNN_DNN_ROUTINE_EACH_R2(DEFINE_WRAP);
#ifdef CUDNN_DNN_ROUTINE_EACH_AFTER_R3 #ifdef CUDNN_DNN_ROUTINE_EACH_AFTER_R7_LESS_R8
CUDNN_DNN_ROUTINE_EACH_AFTER_R3(DEFINE_WRAP); CUDNN_DNN_ROUTINE_EACH_AFTER_R7_LESS_R8(DEFINE_WRAP);
#endif
#ifdef CUDNN_DNN_ROUTINE_EACH_AFTER_R3_LESS_R8
CUDNN_DNN_ROUTINE_EACH_AFTER_R3_LESS_R8(DEFINE_WRAP);
#endif
#ifdef CUDNN_DNN_ROUTINE_EACH_AFTER_R4
CUDNN_DNN_ROUTINE_EACH_AFTER_R4(DEFINE_WRAP);
#endif
#ifdef CUDNN_DNN_ROUTINE_EACH_R5
CUDNN_DNN_ROUTINE_EACH_R5(DEFINE_WRAP);
#endif
#ifdef CUDNN_DNN_ROUTINE_EACH_R6
CUDNN_DNN_ROUTINE_EACH_R6(DEFINE_WRAP);
#endif #endif
#ifdef CUDNN_DNN_ROUTINE_EACH_R7 #ifdef CUDNN_DNN_ROUTINE_EACH_R7
......
...@@ -48,121 +48,93 @@ extern void EnforceCUDNNLoaded(const char* fn_name); ...@@ -48,121 +48,93 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
* include all needed cudnn functions in HPPL * include all needed cudnn functions in HPPL
* different cudnn version has different interfaces * different cudnn version has different interfaces
**/ **/
#define CUDNN_DNN_ROUTINE_EACH(__macro) \ #define CUDNN_DNN_ROUTINE_EACH(__macro) \
__macro(cudnnSetTensor4dDescriptor); \ __macro(cudnnSetTensor4dDescriptor); \
__macro(cudnnSetTensor4dDescriptorEx); \ __macro(cudnnSetTensor4dDescriptorEx); \
__macro(cudnnSetTensorNdDescriptor); \ __macro(cudnnSetTensorNdDescriptor); \
__macro(cudnnGetTensorNdDescriptor); \ __macro(cudnnGetTensorNdDescriptor); \
__macro(cudnnGetConvolutionNdForwardOutputDim); \ __macro(cudnnGetConvolutionNdForwardOutputDim); \
__macro(cudnnCreateTensorDescriptor); \ __macro(cudnnCreateTensorDescriptor); \
__macro(cudnnDestroyTensorDescriptor); \ __macro(cudnnDestroyTensorDescriptor); \
__macro(cudnnCreateFilterDescriptor); \ __macro(cudnnCreateFilterDescriptor); \
__macro(cudnnSetFilter4dDescriptor); \ __macro(cudnnSetFilter4dDescriptor); \
__macro(cudnnSetFilterNdDescriptor); \ __macro(cudnnSetFilterNdDescriptor); \
__macro(cudnnGetFilterNdDescriptor); \ __macro(cudnnGetFilterNdDescriptor); \
__macro(cudnnSetPooling2dDescriptor); \ __macro(cudnnSetPooling2dDescriptor); \
__macro(cudnnSetPoolingNdDescriptor); \ __macro(cudnnSetPoolingNdDescriptor); \
__macro(cudnnGetPoolingNdDescriptor); \ __macro(cudnnGetPoolingNdDescriptor); \
__macro(cudnnDestroyFilterDescriptor); \ __macro(cudnnDestroyFilterDescriptor); \
__macro(cudnnCreateConvolutionDescriptor); \ __macro(cudnnCreateConvolutionDescriptor); \
__macro(cudnnCreatePoolingDescriptor); \ __macro(cudnnCreatePoolingDescriptor); \
__macro(cudnnDestroyPoolingDescriptor); \ __macro(cudnnDestroyPoolingDescriptor); \
__macro(cudnnSetConvolution2dDescriptor); \ __macro(cudnnSetConvolution2dDescriptor); \
__macro(cudnnDestroyConvolutionDescriptor); \ __macro(cudnnDestroyConvolutionDescriptor); \
__macro(cudnnSetConvolutionNdDescriptor); \ __macro(cudnnSetConvolutionNdDescriptor); \
__macro(cudnnGetConvolutionNdDescriptor); \ __macro(cudnnGetConvolutionNdDescriptor); \
__macro(cudnnDeriveBNTensorDescriptor); \ __macro(cudnnDeriveBNTensorDescriptor); \
__macro(cudnnCreateSpatialTransformerDescriptor); \ __macro(cudnnCreateSpatialTransformerDescriptor); \
__macro(cudnnSetSpatialTransformerNdDescriptor); \ __macro(cudnnSetSpatialTransformerNdDescriptor); \
__macro(cudnnDestroySpatialTransformerDescriptor); \ __macro(cudnnDestroySpatialTransformerDescriptor); \
__macro(cudnnSpatialTfGridGeneratorForward); \ __macro(cudnnSpatialTfGridGeneratorForward); \
__macro(cudnnSpatialTfGridGeneratorBackward); \ __macro(cudnnSpatialTfGridGeneratorBackward); \
__macro(cudnnSpatialTfSamplerForward); \ __macro(cudnnSpatialTfSamplerForward); \
__macro(cudnnSpatialTfSamplerBackward); \ __macro(cudnnSpatialTfSamplerBackward); \
__macro(cudnnCreate); \ __macro(cudnnCreate); \
__macro(cudnnDestroy); \ __macro(cudnnDestroy); \
__macro(cudnnSetStream); \ __macro(cudnnSetStream); \
__macro(cudnnActivationForward); \ __macro(cudnnActivationForward); \
__macro(cudnnActivationBackward); \ __macro(cudnnActivationBackward); \
__macro(cudnnConvolutionForward); \ __macro(cudnnConvolutionForward); \
__macro(cudnnConvolutionBackwardBias); \ __macro(cudnnConvolutionBackwardBias); \
__macro(cudnnGetConvolutionForwardWorkspaceSize); \ __macro(cudnnGetConvolutionForwardWorkspaceSize); \
__macro(cudnnTransformTensor); \ __macro(cudnnTransformTensor); \
__macro(cudnnPoolingForward); \ __macro(cudnnPoolingForward); \
__macro(cudnnPoolingBackward); \ __macro(cudnnPoolingBackward); \
__macro(cudnnSoftmaxBackward); \ __macro(cudnnSoftmaxBackward); \
__macro(cudnnSoftmaxForward); \ __macro(cudnnSoftmaxForward); \
__macro(cudnnGetVersion); \ __macro(cudnnGetVersion); \
__macro(cudnnFindConvolutionForwardAlgorithmEx); \ __macro(cudnnFindConvolutionForwardAlgorithmEx); \
__macro(cudnnFindConvolutionBackwardFilterAlgorithmEx); \ __macro(cudnnFindConvolutionBackwardFilterAlgorithmEx); \
__macro(cudnnFindConvolutionBackwardFilterAlgorithm); \ __macro(cudnnFindConvolutionBackwardFilterAlgorithm); \
__macro(cudnnFindConvolutionBackwardDataAlgorithmEx); \ __macro(cudnnFindConvolutionBackwardDataAlgorithmEx); \
__macro(cudnnGetErrorString); \ __macro(cudnnGetErrorString); \
__macro(cudnnCreateDropoutDescriptor); \ __macro(cudnnCreateDropoutDescriptor); \
__macro(cudnnDropoutGetStatesSize); \ __macro(cudnnDropoutGetStatesSize); \
__macro(cudnnSetDropoutDescriptor); \ __macro(cudnnSetDropoutDescriptor); \
__macro(cudnnRestoreDropoutDescriptor); \ __macro(cudnnRestoreDropoutDescriptor); \
__macro(cudnnCreateRNNDescriptor); \ __macro(cudnnCreateRNNDescriptor); \
__macro(cudnnGetRNNParamsSize); \ __macro(cudnnGetRNNParamsSize); \
__macro(cudnnGetRNNWorkspaceSize); \ __macro(cudnnGetRNNWorkspaceSize); \
__macro(cudnnGetRNNTrainingReserveSize); \ __macro(cudnnGetRNNTrainingReserveSize); \
__macro(cudnnRNNForwardTraining); \ __macro(cudnnRNNForwardTraining); \
__macro(cudnnRNNBackwardData); \ __macro(cudnnRNNBackwardData); \
__macro(cudnnRNNBackwardWeights); \ __macro(cudnnRNNBackwardWeights); \
__macro(cudnnRNNForwardInference); \ __macro(cudnnRNNForwardInference); \
__macro(cudnnDestroyDropoutDescriptor); \ __macro(cudnnDestroyDropoutDescriptor); \
__macro(cudnnDestroyRNNDescriptor); \ __macro(cudnnDestroyRNNDescriptor); \
__macro(cudnnSetTensorNdDescriptorEx); __macro(cudnnSetTensorNdDescriptorEx); \
__macro(cudnnAddTensor); \
CUDNN_DNN_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) __macro(cudnnConvolutionBackwardData); \
__macro(cudnnConvolutionBackwardFilter); \
#define CUDNN_DNN_ROUTINE_EACH_R2(__macro) \
__macro(cudnnAddTensor); \
__macro(cudnnConvolutionBackwardData); \
__macro(cudnnConvolutionBackwardFilter);
CUDNN_DNN_ROUTINE_EACH_R2(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
// APIs available after R3:
#if CUDNN_VERSION >= 3000
#define CUDNN_DNN_ROUTINE_EACH_AFTER_R3(__macro) \
__macro(cudnnGetConvolutionBackwardFilterWorkspaceSize); \ __macro(cudnnGetConvolutionBackwardFilterWorkspaceSize); \
__macro(cudnnGetConvolutionBackwardDataWorkspaceSize); __macro(cudnnGetConvolutionBackwardDataWorkspaceSize); \
CUDNN_DNN_ROUTINE_EACH_AFTER_R3(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) __macro(cudnnBatchNormalizationForwardTraining); \
#endif __macro(cudnnBatchNormalizationForwardInference); \
__macro(cudnnBatchNormalizationBackward); \
__macro(cudnnCreateActivationDescriptor); \
__macro(cudnnSetActivationDescriptor); \
__macro(cudnnGetActivationDescriptor); \
__macro(cudnnDestroyActivationDescriptor); \
__macro(cudnnSetRNNDescriptor_v6);
CUDNN_DNN_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
// APIs available after R3: #if CUDNN_VERSION >= 7000 && CUDNN_VERSION < 8000
#if CUDNN_VERSION >= 3000 && CUDNN_VERSION < 8000 #define CUDNN_DNN_ROUTINE_EACH_AFTER_R7_LESS_R8(__macro) \
#define CUDNN_DNN_ROUTINE_EACH_AFTER_R3_LESS_R8(__macro) \
__macro(cudnnGetConvolutionBackwardFilterAlgorithm); \ __macro(cudnnGetConvolutionBackwardFilterAlgorithm); \
__macro(cudnnGetConvolutionForwardAlgorithm); \ __macro(cudnnGetConvolutionForwardAlgorithm); \
__macro(cudnnGetConvolutionBackwardDataAlgorithm); \ __macro(cudnnGetConvolutionBackwardDataAlgorithm); \
__macro(cudnnSetRNNDescriptor); __macro(cudnnSetRNNDescriptor);
CUDNN_DNN_ROUTINE_EACH_AFTER_R3_LESS_R8(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) CUDNN_DNN_ROUTINE_EACH_AFTER_R7_LESS_R8(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#endif
// APIs available after R4:
#if CUDNN_VERSION >= 4007
#define CUDNN_DNN_ROUTINE_EACH_AFTER_R4(__macro) \
__macro(cudnnBatchNormalizationForwardTraining); \
__macro(cudnnBatchNormalizationForwardInference); \
__macro(cudnnBatchNormalizationBackward);
CUDNN_DNN_ROUTINE_EACH_AFTER_R4(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#endif
// APIs in R5
#if CUDNN_VERSION >= 5000
#define CUDNN_DNN_ROUTINE_EACH_R5(__macro) \
__macro(cudnnCreateActivationDescriptor); \
__macro(cudnnSetActivationDescriptor); \
__macro(cudnnGetActivationDescriptor); \
__macro(cudnnDestroyActivationDescriptor);
CUDNN_DNN_ROUTINE_EACH_R5(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#endif
// APIs in R6
#if CUDNN_VERSION >= 6000
#define CUDNN_DNN_ROUTINE_EACH_R6(__macro) __macro(cudnnSetRNNDescriptor_v6);
CUDNN_DNN_ROUTINE_EACH_R6(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
#endif #endif
#if CUDNN_VERSION >= 7001 #if CUDNN_VERSION >= 7001
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册