diff --git a/CMakeLists.txt b/CMakeLists.txt index 92c866da8fc7c711fa0e983d4d31c9b0485ae760..af6a13efbde9e982578c07f19185db5f437f3a1a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,6 +17,7 @@ find_package(PythonInterp 2.7 REQUIRED) find_package(ZLIB REQUIRED) find_package(NumPy REQUIRED) find_package(Threads REQUIRED) +find_package(AVX QUIET) find_package(Glog) find_package(Gflags QUIET) find_package(GTest) @@ -28,7 +29,7 @@ find_program(M4_EXECUTABLE m4) option(WITH_DSO "Compile PaddlePaddle with dynamic linked libraries" ON) option(WITH_GPU "Compile PaddlePaddle with gpu" ${CUDA_FOUND}) option(WITH_DOUBLE "Compile PaddlePaddle with double precision, otherwise use single precision" OFF) -option(WITH_AVX "Compile PaddlePaddle with avx intrinsics" ON) # TODO(yuyang18): Check AVX is supported or not as default value +option(WITH_AVX "Compile PaddlePaddle with avx intrinsics" ${AVX_FOUND}) option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON) option(WITH_STYLE_CHECK "Style Check for PaddlePaddle" ${PYTHONINTERP_FOUND}) option(WITH_RDMA "Compile PaddlePaddle with rdma support" OFF) @@ -101,8 +102,8 @@ if(NOT WITH_TIMER) endif(NOT WITH_TIMER) if(WITH_AVX) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAGS}") else(WITH_AVX) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse3") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3") diff --git a/cmake/FindAVX.cmake b/cmake/FindAVX.cmake new file mode 100644 index 0000000000000000000000000000000000000000..e0f1b7bff5162a1a472536392adebce6eb2741e6 --- /dev/null +++ b/cmake/FindAVX.cmake @@ -0,0 +1,65 @@ +# This file is use to check all support level of AVX on your machine +# so that PaddlePaddle can unleash the vectorization power of muticore. + +INCLUDE(CheckCXXSourceRuns) + +SET(FIND_AVX_10) +SET(FIND_AVX_20) +SET(AVX_FLAGS) +SET(AVX_FOUND) + +# Check AVX 2 +SET(CMAKE_REQUIRED_FLAGS) +IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + SET(CMAKE_REQUIRED_FLAGS "-mavx2") +ELSEIF(MSVC AND NOT CMAKE_CL_64) # reserve for WINDOWS + SET(CMAKE_REQUIRED_FLAGS "/arch:AVX2") +ENDIF() + +CHECK_CXX_SOURCE_RUNS(" +#include +int main() +{ + __m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4); + __m256i result = _mm256_abs_epi32 (a); + return 0; +}" FIND_AVX_20) + +# Check AVX +SET(CMAKE_REQUIRED_FLAGS) +IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + SET(CMAKE_REQUIRED_FLAGS "-mavx") +ELSEIF(MSVC AND NOT CMAKE_CL_64) + SET(CMAKE_REQUIRED_FLAGS "/arch:AVX") +endif() + +CHECK_CXX_SOURCE_RUNS(" +#include +int main() +{ + __m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f); + __m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); + __m256 result = _mm256_add_ps (a, b); + return 0; +}" FIND_AVX_10) + +IF(${FIND_AVX_20}) + IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + SET(AVX_FLAGS "${AVX_FLAGS} -mavx2") + ELSEIF(MSVC) + SET(AVX_FLAGS "${AVX_FLAGS} /arch:AVX2") + ENDIF() +ENDIF() + +IF(${FIND_AVX_10}) + IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + SET(AVX_FLAGS "${AVX_FLAGS} -mavx") + ELSEIF(MSVC) + SET(AVX_FLAGS "${AVX_FLAGS} /arch:AVX") + ENDIF() +ENDIF() + +IF(${FIND_AVX_10} OR ${FIND_AVX_20}) + SET(AVX_FOUND TRUE) + MESSAGE(STATUS "Find CPU supports ${AVX_FLAGS}.") +ENDIF() \ No newline at end of file diff --git a/doc/build/build_from_source.md b/doc/build/build_from_source.md index a6090d68191625d86d8103c9cf96832535cbcf73..948f84abae86f95e568327082d5d0386df59d67b 100644 --- a/doc/build/build_from_source.md +++ b/doc/build/build_from_source.md @@ -11,11 +11,12 @@ You can download PaddlePaddle from the [github source](https://github.com/gangli ```bash git clone https://github.com/baidu/Paddle paddle +cd paddle ``` ## Requirements -To compile the source code, your computer must be equipped with GCC >=4.6 or Clang Compiler. +To compile the source code, your computer must be equipped with GCC >=4.6 or Clang compiler. ### Dependencies - **CMake**: version >= 2.8 @@ -27,17 +28,17 @@ To compile the source code, your computer must be equipped with GCC >=4.6 or Cla PaddlePaddle supports some build options. To enable it, first you need to install the related libraries. - Optional | Description - ------------ | :----------- - **WITH_GPU** | Compile with GPU mode. - **WITH_DOUBLE** | Compile with double precision floating-point, default: single precision. | - **WITH_GLOG** | Compile with glog. If not found, default: an internal log implementation. - **WITH_GFLAGS** | Compile with gflags. If not found, default: an internal flag implementation. - **WITH_TESTING** | Compile with gtest for PaddlePaddle's unit testing. - **WITH_DOC** | Compile to generate PaddlePaddle's docs, default: disabled (OFF). - **WITH_SWIG_PY** | Compile with python predict API, default: disabled (OFF). - **WITH_STYLE_CHECK**| Compile with code style check, default: enabled (ON). -| + +| Optional | Description | +| -------------------- | :--------------------------------------------------------------------------- | +| **WITH_GPU** | Compile with GPU mode. | +| **WITH_DOUBLE** | Compile with double precision floating-point, default: single precision. | +| **WITH_GLOG** | Compile with glog. If not found, default: an internal log implementation. | +| **WITH_GFLAGS** | Compile with gflags. If not found, default: an internal flag implementation. | +| **WITH_TESTING** | Compile with gtest for PaddlePaddle's unit testing. | +| **WITH_DOC** | Compile to generate PaddlePaddle's docs, default: disabled (OFF). | +| **WITH_SWIG_PY** | Compile with python predict API, default: disabled (OFF). | +| **WITH_STYLE_CHECK** | Compile with code style check, default: enabled (ON). | **Note:** - The GPU version works best with Cuda Toolkit 7.5 and cuDNN v5. @@ -118,11 +119,10 @@ As a simple example, consider the following: sudo tar -xzf cudnn-7.5-linux-x64-v5.1.tgz -C /usr/local sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib64/libcudnn* ``` - Then you need to set LD\_LIBRARY\_PATH, CUDA\_HOME and PATH environment variables in ~/.bashrc. + Then you need to set LD\_LIBRARY\_PATH, PATH environment variables in ~/.bashrc. ```bash export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH - export CUDA_HOME=/usr/local/cuda export PATH=/usr/local/cuda/bin:$PATH ``` @@ -158,13 +158,12 @@ As a simple example, consider the following: cmake .. -DWITH_GPU=ON -DWITH_DOC=ON -DWITH_SWIG_PY=ON ``` -Finally, you can download source code and build: +Finally, you can build PaddlePaddle: ```bash # you can add build option here, such as: cmake .. -DWITH_GPU=ON -DWITH_DOC=OFF -DCMAKE_INSTALL_PREFIX= -# please use sudo make install, if you want -# to install PaddlePaddle into the system +# please use sudo make install, if you want to install PaddlePaddle into the system make -j `nproc` && make install # set PaddlePaddle installation path in ~/.bashrc export PATH=/bin:$PATH @@ -240,7 +239,7 @@ easy_install pip sudo tar -xzf cudnn-7.5-osx-x64-v5.0-ga.tgz -C /usr/local sudo chmod a+r /usr/local/cuda/include/cudnn.h /usr/local/cuda/lib64/libcudnn* ``` - 2. Then you need to set DYLD\_LIBRARY\_PATH, CUDA\_HOME and PATH environment variables in ~/.bashrc. + 2. Then you need to set DYLD\_LIBRARY\_PATH, PATH environment variables in ~/.bashrc. ```bash export DYLD_LIBRARY_PATH=/usr/local/cuda/lib:$DYLD_LIBRARY_PATH @@ -289,8 +288,6 @@ make -j `nproc` && make install # set PaddlePaddle installation path in ~/.bashrc export PATH=/bin:$PATH ``` - - **Note:** If you set `WITH_SWIG_PY=ON`, related python dependencies also need to be installed.