提交 317bd91c 编写于 作者: myq406450149's avatar myq406450149

rm GREATER_EQUAL

上级 f904623c
...@@ -35,7 +35,7 @@ list(APPEND CUDNN_CHECK_LIBRARY_DIRS ...@@ -35,7 +35,7 @@ list(APPEND CUDNN_CHECK_LIBRARY_DIRS
${CUDA_TOOLKIT_ROOT_DIR}/lib/x64 ${CUDA_TOOLKIT_ROOT_DIR}/lib/x64
) )
if (${CUDA_VERSION} GREATER_EQUAL 10.0) if((${CUDA_VERSION} GREATER 10.0) OR (${CUDA_VERSION} EQUAL 10.0))
find_library(CUBLAS_LIBRARY NAMES libcublas.so PATHS ${CUDNN_CHECK_LIBRARY_DIRS} NO_DEFAULT_PATH) find_library(CUBLAS_LIBRARY NAMES libcublas.so PATHS ${CUDNN_CHECK_LIBRARY_DIRS} NO_DEFAULT_PATH)
set(CUBLAS_LIBRARIES ${CUBLAS_LIBRARY}) set(CUBLAS_LIBRARIES ${CUBLAS_LIBRARY})
else() else()
......
...@@ -16,24 +16,12 @@ ...@@ -16,24 +16,12 @@
#include "lite/backends/cuda/cuda_utils.h" #include "lite/backends/cuda/cuda_utils.h"
#include "lite/backends/cuda/math/scale.h" #include "lite/backends/cuda/math/scale.h"
#include "lite/backends/cuda/math/utils.h" #include "lite/backends/cuda/math/utils.h"
namespace paddle { namespace paddle {
namespace lite { namespace lite {
namespace cuda { namespace cuda {
namespace math { namespace math {
/*
template <typename T>
__global__ void scale_kernel(int num, const T* in, T* out, const float scale,
const float bias) {
int tid = blockIdx.x * blockDim.x + threadIdx.x;
if (tid < num) {
#if __CUDA_ARCH__ >= 350
out[tid] = __ldg(in + tid) * scale + bias;
#else
out[tid] = in[tid] * scale;
#endif
}
}
*/
#define CUDA_KERNEL_LOOP(i, n) \ #define CUDA_KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
i += blockDim.x * gridDim.x) i += blockDim.x * gridDim.x)
...@@ -61,9 +49,6 @@ template <typename T> ...@@ -61,9 +49,6 @@ template <typename T>
__global__ void scale_kernel( __global__ void scale_kernel(
int count, const T* in_data, T* out_data, const T scale, const T bias) { int count, const T* in_data, T* out_data, const T scale, const T bias) {
int tid = blockIdx.x * blockDim.x + threadIdx.x; int tid = blockIdx.x * blockDim.x + threadIdx.x;
// if (tid < count){
// out_data[tid] = scale * in_data[tid] + bias;
//}
CUDA_KERNEL_LOOP(tid, count) { out_data[tid] = scale * in_data[tid] + bias; } CUDA_KERNEL_LOOP(tid, count) { out_data[tid] = scale * in_data[tid] + bias; }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册