diff --git a/cmake/cupti.cmake b/cmake/cupti.cmake index a6bab6a39512a347b9ca284887353523d2d77e76..54905d5842fecab4821d62b983b761632f45d06f 100644 --- a/cmake/cupti.cmake +++ b/cmake/cupti.cmake @@ -2,9 +2,15 @@ if(NOT WITH_GPU AND NOT WITH_ROCM) return() endif() -set(CUPTI_ROOT - "/usr" - CACHE PATH "CUPTI ROOT") +if(WITH_ROCM) + set(CUPTI_ROOT + "${ROCM_PATH}/CUPTI" + CACHE PATH "CUPTI ROOT") +else() + set(CUPTI_ROOT + "/usr" + CACHE PATH "CUPTI ROOT") +endif() find_path( CUPTI_INCLUDE_DIR cupti.h PATHS ${CUPTI_ROOT} diff --git a/cmake/hip.cmake b/cmake/hip.cmake index 44e9e2ee8ccafdbd2156d7daa89e4c45468acc86..c5b76dd9f3f28fc3e77dd7b886120b08e5d4e8da 100644 --- a/cmake/hip.cmake +++ b/cmake/hip.cmake @@ -106,7 +106,11 @@ list(APPEND HIP_CXX_FLAGS -Wno-duplicate-decl-specifier) list(APPEND HIP_CXX_FLAGS -Wno-implicit-int-float-conversion) list(APPEND HIP_CXX_FLAGS -Wno-pass-failed) list(APPEND HIP_CXX_FLAGS -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP) -list(APPEND HIP_CXX_FLAGS -std=c++14) +if(WITH_CINN) + list(APPEND HIP_CXX_FLAGS -std=c++14) +else() + list(APPEND HIP_CXX_FLAGS -std=c++17) +endif() if(CMAKE_BUILD_TYPE MATCHES Debug) list(APPEND HIP_CXX_FLAGS -g2) diff --git a/paddle/phi/backends/dynload/rccl.cc b/paddle/phi/backends/dynload/rccl.cc index 932c44c34c629e6de4a233fda586de9a805e739e..95e171842527b2b87c245e846846f6fbe2fc8ad3 100644 --- a/paddle/phi/backends/dynload/rccl.cc +++ b/paddle/phi/backends/dynload/rccl.cc @@ -28,9 +28,17 @@ RCCL_RAND_ROUTINE_EACH(DEFINE_WRAP); RCCL_RAND_ROUTINE_EACH_AFTER_2212(DEFINE_WRAP) #endif +#if NCCL_VERSION_CODE >= 2304 +RCCL_RAND_ROUTINE_EACH_AFTER_2304(DEFINE_WRAP) +#endif + #if NCCL_VERSION_CODE >= 2703 RCCL_RAND_ROUTINE_EACH_AFTER_2703(DEFINE_WRAP) #endif +#if NCCL_VERSION_CODE >= 21100 +RCCL_RAND_ROUTINE_EACH_AFTER_21100(DEFINE_WRAP) +#endif + } // namespace dynload } // namespace phi diff --git a/paddle/phi/backends/dynload/rccl.h b/paddle/phi/backends/dynload/rccl.h index 2da35dc2df2db32cbe911e2a3302cafbc92e848e..9232d387d2d19de4656023bf4251f4603eab11bb 100644 --- a/paddle/phi/backends/dynload/rccl.h +++ b/paddle/phi/backends/dynload/rccl.h @@ -64,6 +64,11 @@ RCCL_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_RCCL_WRAP) RCCL_RAND_ROUTINE_EACH_AFTER_2212(DECLARE_DYNAMIC_LOAD_RCCL_WRAP) #endif +#if NCCL_VERSION_CODE >= 2304 +#define RCCL_RAND_ROUTINE_EACH_AFTER_2304(__macro) __macro(ncclGetVersion); +RCCL_RAND_ROUTINE_EACH_AFTER_2304(DECLARE_DYNAMIC_LOAD_RCCL_WRAP) +#endif + #if NCCL_VERSION_CODE >= 2703 #define RCCL_RAND_ROUTINE_EACH_AFTER_2703(__macro) \ __macro(ncclSend); \ @@ -71,5 +76,11 @@ RCCL_RAND_ROUTINE_EACH_AFTER_2212(DECLARE_DYNAMIC_LOAD_RCCL_WRAP) RCCL_RAND_ROUTINE_EACH_AFTER_2703(DECLARE_DYNAMIC_LOAD_RCCL_WRAP) #endif +#if NCCL_VERSION_CODE >= 21100 +#define RCCL_RAND_ROUTINE_EACH_AFTER_21100(__macro) \ + __macro(ncclRedOpCreatePreMulSum); \ + __macro(ncclRedOpDestroy); +RCCL_RAND_ROUTINE_EACH_AFTER_21100(DECLARE_DYNAMIC_LOAD_RCCL_WRAP) +#endif } // namespace dynload } // namespace phi diff --git a/paddle/phi/kernels/funcs/blas/blas_impl.hip.h b/paddle/phi/kernels/funcs/blas/blas_impl.hip.h index bb02242e2db7218adc6f1d5904f26e48664e6014..6aa41e4f4a2b6c8345a76323d346fc9fe0b2c66c 100644 --- a/paddle/phi/kernels/funcs/blas/blas_impl.hip.h +++ b/paddle/phi/kernels/funcs/blas/blas_impl.hip.h @@ -999,12 +999,10 @@ inline void Blas::GEMM(bool transA, int ldc) const { // Note that cublas follows fortran order, so the order is different from // the cblas convention. - rocblas_operation cuTransA = (transA == CblasNoTrans) - ? rocblas_operation_none - : rocblas_operation_transpose; - rocblas_operation cuTransB = (transB == CblasNoTrans) - ? rocblas_operation_none - : rocblas_operation_transpose; + rocblas_operation cuTransA = + transA ? rocblas_operation_none : rocblas_operation_transpose; + rocblas_operation cuTransB = + transB ? rocblas_operation_none : rocblas_operation_transpose; PADDLE_ENFORCE_GE( context_.GetComputeCapability(), 80, diff --git a/paddle/phi/kernels/funcs/top_k_function_cuda.h b/paddle/phi/kernels/funcs/top_k_function_cuda.h index 26374ca36007a3103a187a35754e025f56d6baed..4b89bdb5b1b74822d485d66e2e5ebc4c8637cf2c 100644 --- a/paddle/phi/kernels/funcs/top_k_function_cuda.h +++ b/paddle/phi/kernels/funcs/top_k_function_cuda.h @@ -54,6 +54,15 @@ struct radix_key_codec_base template <> struct radix_key_codec_base : radix_key_codec_integral {}; + +#if ROCM_VERSION_MAJOR >= 5 && ROCM_VERSION_MINOR >= 4 +template <> +struct float_bit_mask : float_bit_mask {}; + +template <> +struct float_bit_mask + : float_bit_mask {}; +#endif } // namespace detail } // namespace rocprim namespace cub = hipcub; diff --git a/paddle/phi/kernels/gpu/argsort_kernel.cu b/paddle/phi/kernels/gpu/argsort_kernel.cu index 5cf3f2894a36c862d173f714d05fb1b677b9be84..5942ffbc4289932227abc5d011e3a35d14231a73 100644 --- a/paddle/phi/kernels/gpu/argsort_kernel.cu +++ b/paddle/phi/kernels/gpu/argsort_kernel.cu @@ -40,6 +40,19 @@ namespace detail { template <> struct radix_key_codec_base : radix_key_codec_integral {}; + +template <> +struct radix_key_codec_base + : radix_key_codec_integral {}; + +#if ROCM_VERSION_MAJOR >= 5 && ROCM_VERSION_MINOR >= 4 +template <> +struct float_bit_mask : float_bit_mask {}; + +template <> +struct float_bit_mask + : float_bit_mask {}; +#endif } // namespace detail } // namespace rocprim #else diff --git a/test/custom_kernel/CMakeLists.txt b/test/custom_kernel/CMakeLists.txt index af700c22038e3cd5044ba1bca7ad46bc1fce3d02..5a710848d00bddd2fe63fc8dcd863d400bde32c1 100644 --- a/test/custom_kernel/CMakeLists.txt +++ b/test/custom_kernel/CMakeLists.txt @@ -7,8 +7,7 @@ string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") set(CUSTOM_ENVS PADDLE_SOURCE_DIR=${PADDLE_SOURCE_DIR} PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR} - CUSTOM_DEVICE_ROOT=${CMAKE_BINARY_DIR}/python/paddle/fluid/tests/custom_kernel -) + CUSTOM_DEVICE_ROOT=${CMAKE_BINARY_DIR}/test) foreach(TEST_OP ${TEST_OPS}) py_test(${TEST_OP} SRCS ${TEST_OP}.py ENVS ${CUSTOM_ENVS}) diff --git a/python/paddle/fluid/tests/custom_kernel/custom_kernel_dot.cc b/test/custom_kernel/custom_kernel_dot.cc similarity index 100% rename from python/paddle/fluid/tests/custom_kernel/custom_kernel_dot.cc rename to test/custom_kernel/custom_kernel_dot.cc diff --git a/python/paddle/fluid/tests/custom_kernel/custom_kernel_dot_c.cc b/test/custom_kernel/custom_kernel_dot_c.cc similarity index 100% rename from python/paddle/fluid/tests/custom_kernel/custom_kernel_dot_c.cc rename to test/custom_kernel/custom_kernel_dot_c.cc