未验证 提交 ea738dda 编写于 作者: T tianshuo78520a 提交者: GitHub

delete cuda9 code (#31883)

上级 e973bd73
......@@ -93,8 +93,8 @@ if(WITH_GPU)
FIND_PACKAGE(CUDA REQUIRED)
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 7)
message(FATAL_ERROR "Paddle needs CUDA >= 7.0 to compile")
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 10.1)
message(FATAL_ERROR "Paddle needs CUDA >= 10.1 to compile")
endif()
if(NOT CUDNN_FOUND)
......
......@@ -6,15 +6,9 @@ endif()
if (WITH_NV_JETSON)
add_definitions(-DWITH_NV_JETSON)
set(paddle_known_gpu_archs "53 62 72")
set(paddle_known_gpu_archs7 "53")
set(paddle_known_gpu_archs8 "53 62")
set(paddle_known_gpu_archs9 "53 62")
set(paddle_known_gpu_archs10 "53 62 72")
else()
set(paddle_known_gpu_archs "30 35 50 52 60 61 70")
set(paddle_known_gpu_archs7 "30 35 50 52")
set(paddle_known_gpu_archs8 "30 35 50 52 60 61")
set(paddle_known_gpu_archs9 "30 35 50 52 60 61 70")
set(paddle_known_gpu_archs "35 50 52 60 61 70 75 80")
set(paddle_known_gpu_archs10 "35 50 52 60 61 70 75")
set(paddle_known_gpu_archs11 "52 60 61 70 75 80")
endif()
......@@ -160,25 +154,7 @@ function(select_nvcc_arch_flags out_variable)
endfunction()
message(STATUS "CUDA detected: " ${CMAKE_CUDA_COMPILER_VERSION})
if (${CMAKE_CUDA_COMPILER_VERSION} LESS 7.0)
set(paddle_known_gpu_archs ${paddle_known_gpu_archs})
elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 8.0) # CUDA 7.x
set(paddle_known_gpu_archs ${paddle_known_gpu_archs7})
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 9.0) # CUDA 8.x
set(paddle_known_gpu_archs ${paddle_known_gpu_archs8})
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
# CUDA 8 may complain that sm_20 is no longer supported. Suppress the
# warning for now.
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 10.0) # CUDA 9.x
set(paddle_known_gpu_archs ${paddle_known_gpu_archs9})
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) # CUDA 10.x
if (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) # CUDA 10.x
set(paddle_known_gpu_archs ${paddle_known_gpu_archs10})
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
......
......@@ -34,10 +34,6 @@ limitations under the License. */
#endif
#endif // PADDLE_WITH_HIP
#if defined(PADDLE_WITH_CUDA) && CUDA_VERSION < 9000
#define __h2div h2div
#endif
#define DIV_ERROR_INFO \
"InvalidArgumentError: Integer division by zero encountered in divide. " \
"Please check.\n"
......
......@@ -26,14 +26,10 @@ namespace platform {
#ifdef PADDLE_WITH_HIP
#define CREATE_SHFL_MASK(mask, predicate) mask = __ballot((predicate))
#else
#if CUDA_VERSION < 9000
#define CREATE_SHFL_MASK(mask, predicate) mask = 0u;
#else
#define FULL_WARP_MASK 0xFFFFFFFF
#define CREATE_SHFL_MASK(mask, predicate) \
mask = __ballot_sync(FULL_WARP_MASK, (predicate))
#endif
#endif
inline static int RoundToPowerOfTwo(int dim) {
if (dim > 512) {
......@@ -69,7 +65,7 @@ template <typename T>
__forceinline__ __device__ T CudaShuffleDownSync(unsigned mask, T val,
int delta,
int width = warpSize) {
#if defined(PADDLE_WITH_HIP) || CUDA_VERSION < 9000
#if defined(PADDLE_WITH_HIP)
return __shfl_down(val, delta, width);
#else
return __shfl_down_sync(mask, val, static_cast<unsigned>(delta), width);
......@@ -79,7 +75,7 @@ __forceinline__ __device__ T CudaShuffleDownSync(unsigned mask, T val,
template <typename T>
__forceinline__ __device__ T CudaShuffleXorSync(unsigned mask, T val,
int width = warpSize) {
#if defined(PADDLE_WITH_HIP) || CUDA_VERSION < 9000
#if defined(PADDLE_WITH_HIP)
return __shfl_xor(val, width);
#else
return __shfl_xor_sync(mask, val, width);
......@@ -87,7 +83,7 @@ __forceinline__ __device__ T CudaShuffleXorSync(unsigned mask, T val,
}
// CUDA 9.0 have native compatible float16 shfl_down
#if defined(PADDLE_WITH_HIP) || CUDA_VERSION < 9000
#if defined(PADDLE_WITH_HIP)
template <>
__forceinline__ __device__ float16 CudaShuffleDownSync(unsigned mask,
float16 val, int delta,
......@@ -170,7 +166,7 @@ __forceinline__ __device__ paddle::platform::complex128 CudaShuffleXorSync(
template <typename T>
__forceinline__ __device__ T CudaShuffleSync(unsigned mask, T val, int src_line,
int width = 32) {
#if defined(PADDLE_WITH_HIP) || CUDA_VERSION < 9000
#if defined(PADDLE_WITH_HIP)
return __shfl(val, src_line, width);
#else
return __shfl_sync(mask, val, src_line, width);
......
......@@ -25,10 +25,6 @@
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/macros.h"
#if defined(PADDLE_WITH_CUDA) && CUDA_VERSION < 9000
enum cublasMath_t { CUBLAS_DEFAULT_MATH = 0 };
#endif
namespace paddle {
namespace platform {
......
......@@ -197,8 +197,7 @@ limitations under the License. */
namespace paddle {
namespace platform {
#if defined(PADDLE_WITH_HIP) || \
(defined(PADDLE_WITH_CUDA) && CUDA_VERSION < 9000)
#if defined(PADDLE_WITH_HIP)
ARITHMETIC_KERNEL(Add, +)
ARITHMETIC_KERNEL(Sub, -)
ARITHMETIC_KERNEL(Mul, *)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册