diff --git a/paddle/cuda/src/hl_cuda_cublas.cc b/paddle/cuda/src/hl_cuda_cublas.cc index f82d6c9402fe392ba3d9e55cd551ff1b052fef65..e8ba232d44b3f66254d4749d4abbcfbe46d1fd0e 100644 --- a/paddle/cuda/src/hl_cuda_cublas.cc +++ b/paddle/cuda/src/hl_cuda_cublas.cc @@ -56,9 +56,14 @@ void *cublas_dso_handle = nullptr; #define DYNAMIC_LOAD_CUBLAS_V2_WRAP(__name) DYNAMIC_LOAD_CUBLAS_WRAP(__name) // include all needed cublas functions in HPPL -#define CUBLAS_BLAS_ROUTINE_EACH(__macro) \ - __macro(cublasSgemv) __macro(cublasDgemv) __macro(cublasSgemm) \ - __macro(cublasDgemm) __macro(cublasSgeam) __macro(cublasDgeam) +// clang-format off +#define CUBLAS_BLAS_ROUTINE_EACH(__macro) \ + __macro(cublasSgemv) \ + __macro(cublasDgemv) \ + __macro(cublasSgemm) \ + __macro(cublasDgemm) \ + __macro(cublasSgeam) \ + __macro(cublasDgeam) \ DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasCreate) DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasDestroy) @@ -81,6 +86,7 @@ CUBLAS_BLAS_ROUTINE_EACH(DYNAMIC_LOAD_CUBLAS_V2_WRAP) } /* namespace dynload */ +// clang-format on #ifndef PADDLE_TYPE_DOUBLE #define CUBLAS_GEAM dynload::cublasSgeam #define CUBLAS_GEMV dynload::cublasSgemv diff --git a/paddle/cuda/src/hl_cuda_device.cc b/paddle/cuda/src/hl_cuda_device.cc index 85d4860b5bff6109663c46be01081558a58093ac..745be35b56278ed2e0033d5fd2806320d3164d7c 100644 --- a/paddle/cuda/src/hl_cuda_device.cc +++ b/paddle/cuda/src/hl_cuda_device.cc @@ -57,10 +57,14 @@ void *curand_dso_handle = nullptr; #endif /* include all needed curand functions in HPPL */ -#define CURAND_RAND_ROUTINE_EACH(__macro) \ - __macro(curandCreateGenerator) __macro(curandSetStream) \ - __macro(curandSetPseudoRandomGeneratorSeed) \ - __macro(curandGenerateUniform) __macro(curandGenerateUniformDouble) +// clang-format off +#define CURAND_RAND_ROUTINE_EACH(__macro) \ + __macro(curandCreateGenerator) \ + __macro(curandSetStream) \ + __macro(curandSetPseudoRandomGeneratorSeed)\ + __macro(curandGenerateUniform) \ + __macro(curandGenerateUniformDouble) +// clang-format on CURAND_RAND_ROUTINE_EACH(DYNAMIC_LOAD_CURAND_WRAP) @@ -99,25 +103,38 @@ void *cudart_dso_handle = nullptr; #endif /* include all needed cuda functions in HPPL */ -#define CUDA_ROUTINE_EACH(__macro) \ - __macro(cudaMalloc) __macro(cudaHostAlloc) __macro(cudaFree) \ - __macro(cudaFreeHost) __macro(cudaMemcpy) __macro(cudaMemset) __macro( \ - cudaMemcpyAsync) __macro(cudaSetDevice) __macro(cudaGetDevice) \ - __macro(cudaGetDeviceCount) __macro(cudaGetDeviceProperties) \ - __macro(cudaDeviceSynchronize) __macro(cudaDeviceCanAccessPeer) \ - __macro(cudaDeviceEnablePeerAccess) \ - __macro(cudaStreamCreate) __macro(cudaStreamDestroy) \ - __macro(cudaStreamSynchronize) __macro( \ - cudaStreamWaitEvent) __macro(cudaEventCreate) \ - __macro(cudaEventRecord) __macro(cudaEventQuery) \ - __macro(cudaEventDestroy) __macro( \ - cudaEventSynchronize) \ - __macro(cudaEventElapsedTime) __macro( \ - cudaSetDeviceFlags) \ - __macro(cudaGetLastError) __macro( \ - cudaFuncSetCacheConfig) \ - __macro(cudaRuntimeGetVersion) \ - __macro(cudaGetErrorString) +// clang-format off +#define CUDA_ROUTINE_EACH(__macro) \ + __macro(cudaMalloc) \ + __macro(cudaHostAlloc) \ + __macro(cudaFree) \ + __macro(cudaFreeHost) \ + __macro(cudaMemcpy) \ + __macro(cudaMemset) \ + __macro(cudaMemcpyAsync) \ + __macro(cudaSetDevice) \ + __macro(cudaGetDevice) \ + __macro(cudaGetDeviceCount) \ + __macro(cudaGetDeviceProperties) \ + __macro(cudaDeviceSynchronize) \ + __macro(cudaDeviceCanAccessPeer) \ + __macro(cudaDeviceEnablePeerAccess) \ + __macro(cudaStreamCreate) \ + __macro(cudaStreamDestroy) \ + __macro(cudaStreamSynchronize) \ + __macro(cudaStreamWaitEvent) \ + __macro(cudaEventCreate) \ + __macro(cudaEventRecord) \ + __macro(cudaEventQuery) \ + __macro(cudaEventDestroy) \ + __macro(cudaEventSynchronize) \ + __macro(cudaEventElapsedTime) \ + __macro(cudaSetDeviceFlags) \ + __macro(cudaGetLastError) \ + __macro(cudaFuncSetCacheConfig) \ + __macro(cudaRuntimeGetVersion) \ + __macro(cudaGetErrorString) +// clang-format on CUDA_ROUTINE_EACH(DYNAMIC_LOAD_CUDART_WRAP) diff --git a/paddle/cuda/src/hl_cudart_wrap.cc b/paddle/cuda/src/hl_cudart_wrap.cc index 610b47581c90b89713f21d8eca6d86ac85ff647c..ff6b830b7addc5c87af0d55070260c279a046a75 100644 --- a/paddle/cuda/src/hl_cudart_wrap.cc +++ b/paddle/cuda/src/hl_cudart_wrap.cc @@ -47,17 +47,20 @@ extern void *cudart_dso_handle; } __name; /* struct DynLoad__##__name */ /* include all needed cuda functions in HPPL */ -#define CUDA_ROUTINE_EACH(__macro) \ - __macro(cudaLaunch, cudaError_t) __macro(cudaSetupArgument, cudaError_t) \ - __macro(cudaConfigureCall, cudaError_t) \ - __macro(__cudaRegisterFatBinary, void **) \ - __macro(__cudaUnregisterFatBinary, void) \ - __macro(__cudaRegisterFunction, void) \ - __macro(__cudaRegisterVar, void) \ - __macro(__cudaRegisterManagedVar, void) \ - __macro(__cudaInitModule, char) \ - __macro(__cudaRegisterTexture, void) \ - __macro(__cudaRegisterSurface, void) +// clang-format off +#define CUDA_ROUTINE_EACH(__macro) \ + __macro(cudaLaunch, cudaError_t) \ + __macro(cudaSetupArgument, cudaError_t) \ + __macro(cudaConfigureCall, cudaError_t) \ + __macro(__cudaRegisterFatBinary, void**) \ + __macro(__cudaUnregisterFatBinary, void) \ + __macro(__cudaRegisterFunction, void) \ + __macro(__cudaRegisterVar, void) \ + __macro(__cudaRegisterManagedVar, void) \ + __macro(__cudaInitModule, char) \ + __macro(__cudaRegisterTexture, void) \ + __macro(__cudaRegisterSurface, void) +// clang-format on CUDA_ROUTINE_EACH(DYNAMIC_LOAD_CUDART_WRAP)