未验证 提交 ea738dda 编写于 作者: T tianshuo78520a 提交者: GitHub

delete cuda9 code (#31883)

上级 e973bd73
...@@ -93,8 +93,8 @@ if(WITH_GPU) ...@@ -93,8 +93,8 @@ if(WITH_GPU)
FIND_PACKAGE(CUDA REQUIRED) FIND_PACKAGE(CUDA REQUIRED)
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 7) if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 10.1)
message(FATAL_ERROR "Paddle needs CUDA >= 7.0 to compile") message(FATAL_ERROR "Paddle needs CUDA >= 10.1 to compile")
endif() endif()
if(NOT CUDNN_FOUND) if(NOT CUDNN_FOUND)
......
...@@ -6,15 +6,9 @@ endif() ...@@ -6,15 +6,9 @@ endif()
if (WITH_NV_JETSON) if (WITH_NV_JETSON)
add_definitions(-DWITH_NV_JETSON) add_definitions(-DWITH_NV_JETSON)
set(paddle_known_gpu_archs "53 62 72") set(paddle_known_gpu_archs "53 62 72")
set(paddle_known_gpu_archs7 "53")
set(paddle_known_gpu_archs8 "53 62")
set(paddle_known_gpu_archs9 "53 62")
set(paddle_known_gpu_archs10 "53 62 72") set(paddle_known_gpu_archs10 "53 62 72")
else() else()
set(paddle_known_gpu_archs "30 35 50 52 60 61 70") set(paddle_known_gpu_archs "35 50 52 60 61 70 75 80")
set(paddle_known_gpu_archs7 "30 35 50 52")
set(paddle_known_gpu_archs8 "30 35 50 52 60 61")
set(paddle_known_gpu_archs9 "30 35 50 52 60 61 70")
set(paddle_known_gpu_archs10 "35 50 52 60 61 70 75") set(paddle_known_gpu_archs10 "35 50 52 60 61 70 75")
set(paddle_known_gpu_archs11 "52 60 61 70 75 80") set(paddle_known_gpu_archs11 "52 60 61 70 75 80")
endif() endif()
...@@ -160,25 +154,7 @@ function(select_nvcc_arch_flags out_variable) ...@@ -160,25 +154,7 @@ function(select_nvcc_arch_flags out_variable)
endfunction() endfunction()
message(STATUS "CUDA detected: " ${CMAKE_CUDA_COMPILER_VERSION}) message(STATUS "CUDA detected: " ${CMAKE_CUDA_COMPILER_VERSION})
if (${CMAKE_CUDA_COMPILER_VERSION} LESS 7.0) if (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) # CUDA 10.x
set(paddle_known_gpu_archs ${paddle_known_gpu_archs})
elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 8.0) # CUDA 7.x
set(paddle_known_gpu_archs ${paddle_known_gpu_archs7})
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 9.0) # CUDA 8.x
set(paddle_known_gpu_archs ${paddle_known_gpu_archs8})
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
# CUDA 8 may complain that sm_20 is no longer supported. Suppress the
# warning for now.
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 10.0) # CUDA 9.x
set(paddle_known_gpu_archs ${paddle_known_gpu_archs9})
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) # CUDA 10.x
set(paddle_known_gpu_archs ${paddle_known_gpu_archs10}) set(paddle_known_gpu_archs ${paddle_known_gpu_archs10})
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
......
...@@ -34,10 +34,6 @@ limitations under the License. */ ...@@ -34,10 +34,6 @@ limitations under the License. */
#endif #endif
#endif // PADDLE_WITH_HIP #endif // PADDLE_WITH_HIP
#if defined(PADDLE_WITH_CUDA) && CUDA_VERSION < 9000
#define __h2div h2div
#endif
#define DIV_ERROR_INFO \ #define DIV_ERROR_INFO \
"InvalidArgumentError: Integer division by zero encountered in divide. " \ "InvalidArgumentError: Integer division by zero encountered in divide. " \
"Please check.\n" "Please check.\n"
......
...@@ -26,14 +26,10 @@ namespace platform { ...@@ -26,14 +26,10 @@ namespace platform {
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
#define CREATE_SHFL_MASK(mask, predicate) mask = __ballot((predicate)) #define CREATE_SHFL_MASK(mask, predicate) mask = __ballot((predicate))
#else #else
#if CUDA_VERSION < 9000
#define CREATE_SHFL_MASK(mask, predicate) mask = 0u;
#else
#define FULL_WARP_MASK 0xFFFFFFFF #define FULL_WARP_MASK 0xFFFFFFFF
#define CREATE_SHFL_MASK(mask, predicate) \ #define CREATE_SHFL_MASK(mask, predicate) \
mask = __ballot_sync(FULL_WARP_MASK, (predicate)) mask = __ballot_sync(FULL_WARP_MASK, (predicate))
#endif #endif
#endif
inline static int RoundToPowerOfTwo(int dim) { inline static int RoundToPowerOfTwo(int dim) {
if (dim > 512) { if (dim > 512) {
...@@ -69,7 +65,7 @@ template <typename T> ...@@ -69,7 +65,7 @@ template <typename T>
__forceinline__ __device__ T CudaShuffleDownSync(unsigned mask, T val, __forceinline__ __device__ T CudaShuffleDownSync(unsigned mask, T val,
int delta, int delta,
int width = warpSize) { int width = warpSize) {
#if defined(PADDLE_WITH_HIP) || CUDA_VERSION < 9000 #if defined(PADDLE_WITH_HIP)
return __shfl_down(val, delta, width); return __shfl_down(val, delta, width);
#else #else
return __shfl_down_sync(mask, val, static_cast<unsigned>(delta), width); return __shfl_down_sync(mask, val, static_cast<unsigned>(delta), width);
...@@ -79,7 +75,7 @@ __forceinline__ __device__ T CudaShuffleDownSync(unsigned mask, T val, ...@@ -79,7 +75,7 @@ __forceinline__ __device__ T CudaShuffleDownSync(unsigned mask, T val,
template <typename T> template <typename T>
__forceinline__ __device__ T CudaShuffleXorSync(unsigned mask, T val, __forceinline__ __device__ T CudaShuffleXorSync(unsigned mask, T val,
int width = warpSize) { int width = warpSize) {
#if defined(PADDLE_WITH_HIP) || CUDA_VERSION < 9000 #if defined(PADDLE_WITH_HIP)
return __shfl_xor(val, width); return __shfl_xor(val, width);
#else #else
return __shfl_xor_sync(mask, val, width); return __shfl_xor_sync(mask, val, width);
...@@ -87,7 +83,7 @@ __forceinline__ __device__ T CudaShuffleXorSync(unsigned mask, T val, ...@@ -87,7 +83,7 @@ __forceinline__ __device__ T CudaShuffleXorSync(unsigned mask, T val,
} }
// CUDA 9.0 have native compatible float16 shfl_down // CUDA 9.0 have native compatible float16 shfl_down
#if defined(PADDLE_WITH_HIP) || CUDA_VERSION < 9000 #if defined(PADDLE_WITH_HIP)
template <> template <>
__forceinline__ __device__ float16 CudaShuffleDownSync(unsigned mask, __forceinline__ __device__ float16 CudaShuffleDownSync(unsigned mask,
float16 val, int delta, float16 val, int delta,
...@@ -170,7 +166,7 @@ __forceinline__ __device__ paddle::platform::complex128 CudaShuffleXorSync( ...@@ -170,7 +166,7 @@ __forceinline__ __device__ paddle::platform::complex128 CudaShuffleXorSync(
template <typename T> template <typename T>
__forceinline__ __device__ T CudaShuffleSync(unsigned mask, T val, int src_line, __forceinline__ __device__ T CudaShuffleSync(unsigned mask, T val, int src_line,
int width = 32) { int width = 32) {
#if defined(PADDLE_WITH_HIP) || CUDA_VERSION < 9000 #if defined(PADDLE_WITH_HIP)
return __shfl(val, src_line, width); return __shfl(val, src_line, width);
#else #else
return __shfl_sync(mask, val, src_line, width); return __shfl_sync(mask, val, src_line, width);
......
...@@ -25,10 +25,6 @@ ...@@ -25,10 +25,6 @@
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/macros.h" #include "paddle/fluid/platform/macros.h"
#if defined(PADDLE_WITH_CUDA) && CUDA_VERSION < 9000
enum cublasMath_t { CUBLAS_DEFAULT_MATH = 0 };
#endif
namespace paddle { namespace paddle {
namespace platform { namespace platform {
......
...@@ -197,8 +197,7 @@ limitations under the License. */ ...@@ -197,8 +197,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace platform { namespace platform {
#if defined(PADDLE_WITH_HIP) || \ #if defined(PADDLE_WITH_HIP)
(defined(PADDLE_WITH_CUDA) && CUDA_VERSION < 9000)
ARITHMETIC_KERNEL(Add, +) ARITHMETIC_KERNEL(Add, +)
ARITHMETIC_KERNEL(Sub, -) ARITHMETIC_KERNEL(Sub, -)
ARITHMETIC_KERNEL(Mul, *) ARITHMETIC_KERNEL(Mul, *)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册