diff --git a/CMakeLists.txt b/CMakeLists.txt index 4613155f7700b25b2a8d7c250832722085b332fa..8ce51bb0f4109f596840b4b4cadf8d01525f6dba 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -98,6 +98,8 @@ else() add_definitions(-DPADDLE_USE_DSO) endif(WITH_DSO) + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-g -O3 --use_fast_math") + # Include cuda and cudnn include_directories(${CUDNN_INCLUDE_DIR}) include_directories(${CUDA_TOOLKIT_INCLUDE}) @@ -117,6 +119,9 @@ endif(NOT WITH_TIMER) if(WITH_AVX) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAGS}") + if(AVX_FOUND) + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler -mavx") + endif(AVX_FOUND) else(WITH_AVX) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse3") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3") diff --git a/cmake/util.cmake b/cmake/util.cmake index 0fa36f070cc11be543efe9573b93173ec771b9be..74227bf14e37c8560bf5a19aeb716ff8e41cf242 100644 --- a/cmake/util.cmake +++ b/cmake/util.cmake @@ -178,13 +178,6 @@ macro(add_simple_unittest TARGET_NAME) add_unittest(${TARGET_NAME} ${TARGET_NAME}.cpp) endmacro() -macro(add_paddle_culib TARGET_NAME) - set(NVCC_FLAG ${CUDA_NVCC_FLAGS}) - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};--use_fast_math) - cuda_add_library(${TARGET_NAME} STATIC ${ARGN}) - set(CUDA_NVCC_FLAGS ${NVCC_FLAG}) -endmacro() - # Creates C resources file from files in given resource file function(create_resources res_file output) diff --git a/paddle/gserver/CMakeLists.txt b/paddle/gserver/CMakeLists.txt index 9ac4d210f6d376639df20800b6782f1f8c03d6aa..a066f80c221ee8ab4383ee6463f7b111984b58ff 100644 --- a/paddle/gserver/CMakeLists.txt +++ b/paddle/gserver/CMakeLists.txt @@ -50,7 +50,7 @@ if(NOT WITH_PYTHON) endif() if(WITH_GPU) - add_paddle_culib(paddle_gserver ${GSERVER_SOURCES}) + cuda_add_library(paddle_gserver ${GSERVER_SOURCES}) else() add_library(paddle_gserver STATIC ${GSERVER_SOURCES}) diff --git a/paddle/math/CMakeLists.txt b/paddle/math/CMakeLists.txt index 1bb271ba80e1aa899f967afe27d6f5b8cf71eecd..f5657c4690ca71200346efd4e2c5244c02c92eb1 100644 --- a/paddle/math/CMakeLists.txt +++ b/paddle/math/CMakeLists.txt @@ -25,7 +25,7 @@ if(NOT WITH_GPU) add_library(paddle_math STATIC ${MATH_SOURCES}) else() - add_paddle_culib(paddle_math ${MATH_SOURCES}) + cuda_add_library(paddle_math ${MATH_SOURCES}) endif() diff --git a/paddle/math/tests/CMakeLists.txt b/paddle/math/tests/CMakeLists.txt index 07fd1adc1be9c969ca3500cd4e8ce5275b5b5956..ccc3deac7be8708c8dd19b6e3404ba43cb052401 100644 --- a/paddle/math/tests/CMakeLists.txt +++ b/paddle/math/tests/CMakeLists.txt @@ -15,7 +15,7 @@ add_simple_unittest(test_perturbation) add_simple_unittest(test_CpuGpuVector) add_simple_unittest(test_Allocator) if(COMPILER_SUPPORT_CXX11) - LIST(APPEND CUDA_NVCC_FLAGS -std=c++11 -Xcompiler -fPIC --use_fast_math) + LIST(APPEND CUDA_NVCC_FLAGS -std=c++11) CUDA_ADD_EXECUTABLE(test_Tensor test_Tensor.cu) link_paddle_test(test_Tensor) endif() diff --git a/paddle/math/tests/test_TrainingAlgorithm.cpp b/paddle/math/tests/test_TrainingAlgorithm.cpp index b465ae6c26264317515dfdab027305fa391c1834..1759d221e51de121f304ba01dcc9ee26c78e1800 100644 --- a/paddle/math/tests/test_TrainingAlgorithm.cpp +++ b/paddle/math/tests/test_TrainingAlgorithm.cpp @@ -81,7 +81,7 @@ int VectorCheckErr(const VectorPtr& vector1, const VectorPtr& vector2) { #else -#include "paddle/common/Stat.h" +#include "paddle/utils/Stat.h" #define CHECK_VECTORPTR(vector1, vector2)