From 1885d55a47cb237f45932a559a9cbddcd6651d92 Mon Sep 17 00:00:00 2001 From: zlsh80826 <rewang@nvidia.com> Date: Mon, 16 Jan 2023 22:03:13 +0800 Subject: [PATCH] CUDA12.0 integration (#49539) * Update warpctc for cuda-12 * Deprecate cudaProfilerInitialize for CUDA > 11 * Deprecate CUSPARSE_MV_ALG_DEFAULT for CUDA_VERSION >= 11040 * Add the missing thrust header --- cmake/external/warpctc.cmake | 2 +- paddle/fluid/platform/device/gpu/cuda/cuda_profiler.cc | 2 ++ paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h | 8 ++++++++ paddle/phi/kernels/gpu/send_u_recv_kernel.cu | 1 + paddle/phi/kernels/gpu/send_ue_recv_kernel.cu | 1 + paddle/phi/kernels/gpu/unique_kernel.cu | 1 + paddle/phi/kernels/sparse/gpu/coalesce_kernel.cu | 3 +++ 7 files changed, 17 insertions(+), 1 deletion(-) diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake index c7a4e1d99bf..aa8ab62d7ae 100644 --- a/cmake/external/warpctc.cmake +++ b/cmake/external/warpctc.cmake @@ -23,7 +23,7 @@ set(WARPCTC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/warpctc) # in case of low internet speed #set(WARPCTC_REPOSITORY https://gitee.com/tianjianhe/warp-ctc.git) set(WARPCTC_REPOSITORY ${GIT_URL}/baidu-research/warp-ctc.git) -set(WARPCTC_TAG 37ece0e1bbe8a0019a63ac7e6462c36591c66a5b) +set(WARPCTC_TAG bdc2b4550453e0ef2d3b5190f9c6103a84eff184) set(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include" diff --git a/paddle/fluid/platform/device/gpu/cuda/cuda_profiler.cc b/paddle/fluid/platform/device/gpu/cuda/cuda_profiler.cc index cebb36cbc64..a49d9013fb6 100644 --- a/paddle/fluid/platform/device/gpu/cuda/cuda_profiler.cc +++ b/paddle/fluid/platform/device/gpu/cuda/cuda_profiler.cc @@ -20,6 +20,7 @@ namespace platform { void CudaProfilerInit(const std::string& output_file, const std::string& output_mode, const std::string& config_file) { +#if CUDA_VERSION < 11000 PADDLE_ENFORCE(output_mode == "kvp" || output_mode == "csv", platform::errors::InvalidArgument( "Unsupported cuda profiler output mode, expect `kvp` or " @@ -28,6 +29,7 @@ void CudaProfilerInit(const std::string& output_file, cudaOutputMode_t mode = output_mode == "csv" ? cudaCSV : cudaKeyValuePair; PADDLE_ENFORCE_GPU_SUCCESS( cudaProfilerInitialize(config_file.c_str(), output_file.c_str(), mode)); +#endif } void CudaProfilerStart() { PADDLE_ENFORCE_GPU_SUCCESS(cudaProfilerStart()); } diff --git a/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h b/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h index 81c4faeb818..bf4553f3ab7 100644 --- a/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h +++ b/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h @@ -381,7 +381,11 @@ void SparseBlas<phi::GPUContext>::SPMV(bool transa, &beta, out_descriptor.descriptor(), gpu_type, +#if CUDA_VERSION >= 11040 + CUSPARSE_SPMV_ALG_DEFAULT, +#else CUSPARSE_MV_ALG_DEFAULT, +#endif &buffer_size); }); @@ -399,7 +403,11 @@ void SparseBlas<phi::GPUContext>::SPMV(bool transa, &beta, out_descriptor.descriptor(), gpu_type, +#if CUDA_VERSION >= 11040 + CUSPARSE_SPMV_ALG_DEFAULT, +#else CUSPARSE_MV_ALG_DEFAULT, +#endif tmp_buffer_ptr); }); } diff --git a/paddle/phi/kernels/gpu/send_u_recv_kernel.cu b/paddle/phi/kernels/gpu/send_u_recv_kernel.cu index 0f000af536d..85ca46d7e07 100644 --- a/paddle/phi/kernels/gpu/send_u_recv_kernel.cu +++ b/paddle/phi/kernels/gpu/send_u_recv_kernel.cu @@ -15,6 +15,7 @@ #include "paddle/phi/kernels/send_u_recv_kernel.h" #include <thrust/device_vector.h> +#include <thrust/execution_policy.h> #include <thrust/fill.h> #include <algorithm> diff --git a/paddle/phi/kernels/gpu/send_ue_recv_kernel.cu b/paddle/phi/kernels/gpu/send_ue_recv_kernel.cu index aaae915f9df..834a93d629d 100644 --- a/paddle/phi/kernels/gpu/send_ue_recv_kernel.cu +++ b/paddle/phi/kernels/gpu/send_ue_recv_kernel.cu @@ -15,6 +15,7 @@ #include "paddle/phi/kernels/send_ue_recv_kernel.h" #include <thrust/device_vector.h> +#include <thrust/execution_policy.h> #include <thrust/fill.h> #include <algorithm> #include <vector> diff --git a/paddle/phi/kernels/gpu/unique_kernel.cu b/paddle/phi/kernels/gpu/unique_kernel.cu index 316fe1fae71..d420c8f438b 100644 --- a/paddle/phi/kernels/gpu/unique_kernel.cu +++ b/paddle/phi/kernels/gpu/unique_kernel.cu @@ -20,6 +20,7 @@ #include <thrust/functional.h> #include <thrust/scatter.h> #include <thrust/sequence.h> +#include <thrust/sort.h> #include <thrust/unique.h> #include <iostream> diff --git a/paddle/phi/kernels/sparse/gpu/coalesce_kernel.cu b/paddle/phi/kernels/sparse/gpu/coalesce_kernel.cu index d499cdf54ab..b9089dad716 100644 --- a/paddle/phi/kernels/sparse/gpu/coalesce_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/coalesce_kernel.cu @@ -14,6 +14,9 @@ limitations under the License. */ #include "paddle/phi/kernels/sparse/coalesce_kernel.h" +#include <thrust/sort.h> +#include <thrust/unique.h> + #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/core/kernel_registry.h" -- GitLab