diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 9c1bd52e7fb7dfad5f6dc36d850468bf69ee92cd..851520328f3ce74969dfc92ef73f1ba17ce482d3 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -93,8 +93,8 @@ if(WITH_GPU) FIND_PACKAGE(CUDA REQUIRED) - if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 7) - message(FATAL_ERROR "Paddle needs CUDA >= 7.0 to compile") + if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 10.1) + message(FATAL_ERROR "Paddle needs CUDA >= 10.1 to compile") endif() if(NOT CUDNN_FOUND) diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake index 05b559520744293ee225b103cf5b5990e60e009d..7f2addb02d36ddf85cd08542cc5baab31d495bc5 100644 --- a/cmake/cuda.cmake +++ b/cmake/cuda.cmake @@ -6,15 +6,9 @@ endif() if (WITH_NV_JETSON) add_definitions(-DWITH_NV_JETSON) set(paddle_known_gpu_archs "53 62 72") - set(paddle_known_gpu_archs7 "53") - set(paddle_known_gpu_archs8 "53 62") - set(paddle_known_gpu_archs9 "53 62") set(paddle_known_gpu_archs10 "53 62 72") else() - set(paddle_known_gpu_archs "30 35 50 52 60 61 70") - set(paddle_known_gpu_archs7 "30 35 50 52") - set(paddle_known_gpu_archs8 "30 35 50 52 60 61") - set(paddle_known_gpu_archs9 "30 35 50 52 60 61 70") + set(paddle_known_gpu_archs "35 50 52 60 61 70 75 80") set(paddle_known_gpu_archs10 "35 50 52 60 61 70 75") set(paddle_known_gpu_archs11 "52 60 61 70 75 80") endif() @@ -160,25 +154,7 @@ function(select_nvcc_arch_flags out_variable) endfunction() message(STATUS "CUDA detected: " ${CMAKE_CUDA_COMPILER_VERSION}) -if (${CMAKE_CUDA_COMPILER_VERSION} LESS 7.0) - set(paddle_known_gpu_archs ${paddle_known_gpu_archs}) -elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 8.0) # CUDA 7.x - set(paddle_known_gpu_archs ${paddle_known_gpu_archs7}) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED") - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__") -elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 9.0) # CUDA 8.x - set(paddle_known_gpu_archs ${paddle_known_gpu_archs8}) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED") - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__") - # CUDA 8 may complain that sm_20 is no longer supported. Suppress the - # warning for now. - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") -elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 10.0) # CUDA 9.x - set(paddle_known_gpu_archs ${paddle_known_gpu_archs9}) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED") - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__") - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") -elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) # CUDA 10.x +if (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) # CUDA 10.x set(paddle_known_gpu_archs ${paddle_known_gpu_archs10}) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__") diff --git a/paddle/fluid/operators/elementwise/elementwise_op_function.cu.h b/paddle/fluid/operators/elementwise/elementwise_op_function.cu.h index 1121d0ef68ce2f498c8e945dbf2e65102ebae824..6d5dcc4dd6f5559dfcc9c9d2e87f167fed83d0cb 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op_function.cu.h +++ b/paddle/fluid/operators/elementwise/elementwise_op_function.cu.h @@ -34,10 +34,6 @@ limitations under the License. */ #endif #endif // PADDLE_WITH_HIP -#if defined(PADDLE_WITH_CUDA) && CUDA_VERSION < 9000 -#define __h2div h2div -#endif - #define DIV_ERROR_INFO \ "InvalidArgumentError: Integer division by zero encountered in divide. " \ "Please check.\n" diff --git a/paddle/fluid/platform/cuda_device_function.h b/paddle/fluid/platform/cuda_device_function.h index 4f504b414de4a7e8413fcb8f853c07d55ecff493..dde9531e59144218c91d789a8fe668d3fffb70f2 100644 --- a/paddle/fluid/platform/cuda_device_function.h +++ b/paddle/fluid/platform/cuda_device_function.h @@ -26,14 +26,10 @@ namespace platform { #ifdef PADDLE_WITH_HIP #define CREATE_SHFL_MASK(mask, predicate) mask = __ballot((predicate)) #else -#if CUDA_VERSION < 9000 -#define CREATE_SHFL_MASK(mask, predicate) mask = 0u; -#else #define FULL_WARP_MASK 0xFFFFFFFF #define CREATE_SHFL_MASK(mask, predicate) \ mask = __ballot_sync(FULL_WARP_MASK, (predicate)) #endif -#endif inline static int RoundToPowerOfTwo(int dim) { if (dim > 512) { @@ -69,7 +65,7 @@ template __forceinline__ __device__ T CudaShuffleDownSync(unsigned mask, T val, int delta, int width = warpSize) { -#if defined(PADDLE_WITH_HIP) || CUDA_VERSION < 9000 +#if defined(PADDLE_WITH_HIP) return __shfl_down(val, delta, width); #else return __shfl_down_sync(mask, val, static_cast(delta), width); @@ -79,7 +75,7 @@ __forceinline__ __device__ T CudaShuffleDownSync(unsigned mask, T val, template __forceinline__ __device__ T CudaShuffleXorSync(unsigned mask, T val, int width = warpSize) { -#if defined(PADDLE_WITH_HIP) || CUDA_VERSION < 9000 +#if defined(PADDLE_WITH_HIP) return __shfl_xor(val, width); #else return __shfl_xor_sync(mask, val, width); @@ -87,7 +83,7 @@ __forceinline__ __device__ T CudaShuffleXorSync(unsigned mask, T val, } // CUDA 9.0 have native compatible float16 shfl_down -#if defined(PADDLE_WITH_HIP) || CUDA_VERSION < 9000 +#if defined(PADDLE_WITH_HIP) template <> __forceinline__ __device__ float16 CudaShuffleDownSync(unsigned mask, float16 val, int delta, @@ -170,7 +166,7 @@ __forceinline__ __device__ paddle::platform::complex128 CudaShuffleXorSync( template __forceinline__ __device__ T CudaShuffleSync(unsigned mask, T val, int src_line, int width = 32) { -#if defined(PADDLE_WITH_HIP) || CUDA_VERSION < 9000 +#if defined(PADDLE_WITH_HIP) return __shfl(val, src_line, width); #else return __shfl_sync(mask, val, src_line, width); diff --git a/paddle/fluid/platform/cuda_helper.h b/paddle/fluid/platform/cuda_helper.h index fa4ef3f8c124e407a2494828d390e2c8d6c2c8ca..202be920c559535eeea813b3b27dd3fa48011048 100644 --- a/paddle/fluid/platform/cuda_helper.h +++ b/paddle/fluid/platform/cuda_helper.h @@ -25,10 +25,6 @@ #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/macros.h" -#if defined(PADDLE_WITH_CUDA) && CUDA_VERSION < 9000 -enum cublasMath_t { CUBLAS_DEFAULT_MATH = 0 }; -#endif - namespace paddle { namespace platform { diff --git a/paddle/fluid/platform/float16_test.cu b/paddle/fluid/platform/float16_test.cu index d181660e311960544f798f8b9e22701147c65aa5..75e35d398c27e718f5f1e568b2082fb3439aa233 100644 --- a/paddle/fluid/platform/float16_test.cu +++ b/paddle/fluid/platform/float16_test.cu @@ -197,8 +197,7 @@ limitations under the License. */ namespace paddle { namespace platform { -#if defined(PADDLE_WITH_HIP) || \ - (defined(PADDLE_WITH_CUDA) && CUDA_VERSION < 9000) +#if defined(PADDLE_WITH_HIP) ARITHMETIC_KERNEL(Add, +) ARITHMETIC_KERNEL(Sub, -) ARITHMETIC_KERNEL(Mul, *)