diff --git a/cmake/cudnn.cmake b/cmake/cudnn.cmake index a3a62c6b2ae2b939aa015a909b44fa492e6e5fb1..a8a4d34fe5f01a4f0fedf2b9c2a09d7c4383bd25 100644 --- a/cmake/cudnn.cmake +++ b/cmake/cudnn.cmake @@ -35,7 +35,7 @@ list(APPEND CUDNN_CHECK_LIBRARY_DIRS ${CUDA_TOOLKIT_ROOT_DIR}/lib/x64 ) -if (${CUDA_VERSION} GREATER_EQUAL 10.0) +if((${CUDA_VERSION} GREATER 10.0) OR (${CUDA_VERSION} EQUAL 10.0)) find_library(CUBLAS_LIBRARY NAMES libcublas.so PATHS ${CUDNN_CHECK_LIBRARY_DIRS} NO_DEFAULT_PATH) set(CUBLAS_LIBRARIES ${CUBLAS_LIBRARY}) else() diff --git a/lite/backends/cuda/math/scale.cu b/lite/backends/cuda/math/scale.cu index 0e51fec0f232a6ceae3d4e5a36d9c3088ae29502..806a3697a2eb19354a81056f0a7ab6272ed991a1 100644 --- a/lite/backends/cuda/math/scale.cu +++ b/lite/backends/cuda/math/scale.cu @@ -16,24 +16,12 @@ #include "lite/backends/cuda/cuda_utils.h" #include "lite/backends/cuda/math/scale.h" #include "lite/backends/cuda/math/utils.h" + namespace paddle { namespace lite { namespace cuda { namespace math { -/* -template -__global__ void scale_kernel(int num, const T* in, T* out, const float scale, -const float bias) { - int tid = blockIdx.x * blockDim.x + threadIdx.x; - if (tid < num) { -#if __CUDA_ARCH__ >= 350 - out[tid] = __ldg(in + tid) * scale + bias; -#else - out[tid] = in[tid] * scale; -#endif - } -} -*/ + #define CUDA_KERNEL_LOOP(i, n) \ for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ i += blockDim.x * gridDim.x) @@ -61,9 +49,6 @@ template __global__ void scale_kernel( int count, const T* in_data, T* out_data, const T scale, const T bias) { int tid = blockIdx.x * blockDim.x + threadIdx.x; - // if (tid < count){ - // out_data[tid] = scale * in_data[tid] + bias; - //} CUDA_KERNEL_LOOP(tid, count) { out_data[tid] = scale * in_data[tid] + bias; } }