提交 9ea0661a 编写于 作者: L Luo Tao

clang format off on some cuda .cc file

上级 80c68d38
......@@ -56,9 +56,14 @@ void *cublas_dso_handle = nullptr;
#define DYNAMIC_LOAD_CUBLAS_V2_WRAP(__name) DYNAMIC_LOAD_CUBLAS_WRAP(__name)
// include all needed cublas functions in HPPL
#define CUBLAS_BLAS_ROUTINE_EACH(__macro) \
__macro(cublasSgemv) __macro(cublasDgemv) __macro(cublasSgemm) \
__macro(cublasDgemm) __macro(cublasSgeam) __macro(cublasDgeam)
// clang-format off
#define CUBLAS_BLAS_ROUTINE_EACH(__macro) \
__macro(cublasSgemv) \
__macro(cublasDgemv) \
__macro(cublasSgemm) \
__macro(cublasDgemm) \
__macro(cublasSgeam) \
__macro(cublasDgeam) \
DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasCreate)
DYNAMIC_LOAD_CUBLAS_V2_WRAP(cublasDestroy)
......@@ -81,6 +86,7 @@ CUBLAS_BLAS_ROUTINE_EACH(DYNAMIC_LOAD_CUBLAS_V2_WRAP)
} /* namespace dynload */
// clang-format on
#ifndef PADDLE_TYPE_DOUBLE
#define CUBLAS_GEAM dynload::cublasSgeam
#define CUBLAS_GEMV dynload::cublasSgemv
......
......@@ -57,10 +57,14 @@ void *curand_dso_handle = nullptr;
#endif
/* include all needed curand functions in HPPL */
#define CURAND_RAND_ROUTINE_EACH(__macro) \
__macro(curandCreateGenerator) __macro(curandSetStream) \
__macro(curandSetPseudoRandomGeneratorSeed) \
__macro(curandGenerateUniform) __macro(curandGenerateUniformDouble)
// clang-format off
#define CURAND_RAND_ROUTINE_EACH(__macro) \
__macro(curandCreateGenerator) \
__macro(curandSetStream) \
__macro(curandSetPseudoRandomGeneratorSeed)\
__macro(curandGenerateUniform) \
__macro(curandGenerateUniformDouble)
// clang-format on
CURAND_RAND_ROUTINE_EACH(DYNAMIC_LOAD_CURAND_WRAP)
......@@ -99,25 +103,38 @@ void *cudart_dso_handle = nullptr;
#endif
/* include all needed cuda functions in HPPL */
#define CUDA_ROUTINE_EACH(__macro) \
__macro(cudaMalloc) __macro(cudaHostAlloc) __macro(cudaFree) \
__macro(cudaFreeHost) __macro(cudaMemcpy) __macro(cudaMemset) __macro( \
cudaMemcpyAsync) __macro(cudaSetDevice) __macro(cudaGetDevice) \
__macro(cudaGetDeviceCount) __macro(cudaGetDeviceProperties) \
__macro(cudaDeviceSynchronize) __macro(cudaDeviceCanAccessPeer) \
__macro(cudaDeviceEnablePeerAccess) \
__macro(cudaStreamCreate) __macro(cudaStreamDestroy) \
__macro(cudaStreamSynchronize) __macro( \
cudaStreamWaitEvent) __macro(cudaEventCreate) \
__macro(cudaEventRecord) __macro(cudaEventQuery) \
__macro(cudaEventDestroy) __macro( \
cudaEventSynchronize) \
__macro(cudaEventElapsedTime) __macro( \
cudaSetDeviceFlags) \
__macro(cudaGetLastError) __macro( \
cudaFuncSetCacheConfig) \
__macro(cudaRuntimeGetVersion) \
__macro(cudaGetErrorString)
// clang-format off
#define CUDA_ROUTINE_EACH(__macro) \
__macro(cudaMalloc) \
__macro(cudaHostAlloc) \
__macro(cudaFree) \
__macro(cudaFreeHost) \
__macro(cudaMemcpy) \
__macro(cudaMemset) \
__macro(cudaMemcpyAsync) \
__macro(cudaSetDevice) \
__macro(cudaGetDevice) \
__macro(cudaGetDeviceCount) \
__macro(cudaGetDeviceProperties) \
__macro(cudaDeviceSynchronize) \
__macro(cudaDeviceCanAccessPeer) \
__macro(cudaDeviceEnablePeerAccess) \
__macro(cudaStreamCreate) \
__macro(cudaStreamDestroy) \
__macro(cudaStreamSynchronize) \
__macro(cudaStreamWaitEvent) \
__macro(cudaEventCreate) \
__macro(cudaEventRecord) \
__macro(cudaEventQuery) \
__macro(cudaEventDestroy) \
__macro(cudaEventSynchronize) \
__macro(cudaEventElapsedTime) \
__macro(cudaSetDeviceFlags) \
__macro(cudaGetLastError) \
__macro(cudaFuncSetCacheConfig) \
__macro(cudaRuntimeGetVersion) \
__macro(cudaGetErrorString)
// clang-format on
CUDA_ROUTINE_EACH(DYNAMIC_LOAD_CUDART_WRAP)
......
......@@ -47,17 +47,20 @@ extern void *cudart_dso_handle;
} __name; /* struct DynLoad__##__name */
/* include all needed cuda functions in HPPL */
#define CUDA_ROUTINE_EACH(__macro) \
__macro(cudaLaunch, cudaError_t) __macro(cudaSetupArgument, cudaError_t) \
__macro(cudaConfigureCall, cudaError_t) \
__macro(__cudaRegisterFatBinary, void **) \
__macro(__cudaUnregisterFatBinary, void) \
__macro(__cudaRegisterFunction, void) \
__macro(__cudaRegisterVar, void) \
__macro(__cudaRegisterManagedVar, void) \
__macro(__cudaInitModule, char) \
__macro(__cudaRegisterTexture, void) \
__macro(__cudaRegisterSurface, void)
// clang-format off
#define CUDA_ROUTINE_EACH(__macro) \
__macro(cudaLaunch, cudaError_t) \
__macro(cudaSetupArgument, cudaError_t) \
__macro(cudaConfigureCall, cudaError_t) \
__macro(__cudaRegisterFatBinary, void**) \
__macro(__cudaUnregisterFatBinary, void) \
__macro(__cudaRegisterFunction, void) \
__macro(__cudaRegisterVar, void) \
__macro(__cudaRegisterManagedVar, void) \
__macro(__cudaInitModule, char) \
__macro(__cudaRegisterTexture, void) \
__macro(__cudaRegisterSurface, void)
// clang-format on
CUDA_ROUTINE_EACH(DYNAMIC_LOAD_CUDART_WRAP)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册