From 5a6d7fe2ff6c946b2d9fe7816a9ee8a321c1b9fa Mon Sep 17 00:00:00 2001 From: peizhilin Date: Tue, 18 Dec 2018 11:36:42 +0800 Subject: [PATCH] add mkl,ctc support for windows --- CMakeLists.txt | 12 +-- cmake/cuda.cmake | 3 + cmake/cudnn.cmake | 1 + cmake/external/cub.cmake | 2 +- cmake/external/dlpack.cmake | 2 +- cmake/external/mkldnn.cmake | 43 +++++++--- cmake/external/mklml.cmake | 83 +++++++++++-------- cmake/external/python.cmake | 8 +- cmake/external/warpctc.cmake | 30 +++++-- cmake/external/xbyak.cmake | 4 +- cmake/generic.cmake | 6 +- cmake/inference_lib.cmake | 16 ++-- cmake/operators.cmake | 2 +- cmake/simd.cmake | 73 ++++++++-------- paddle/fluid/framework/CMakeLists.txt | 3 +- .../framework/details/all_reduce_op_handle.cc | 2 +- paddle/fluid/framework/mixed_vector.h | 10 +-- paddle/fluid/framework/op_registry.h | 3 +- .../inference/api/demo_ci/CMakeLists.txt | 15 +++- .../fluid/memory/detail/system_allocator.cc | 1 - paddle/fluid/operators/CMakeLists.txt | 7 +- paddle/fluid/operators/cum_op.h | 2 + .../elementwise/elementwise_mul_mkldnn_op.cc | 3 + .../operators/math/detail/lstm_cpu_kernel.h | 6 ++ paddle/fluid/operators/math/jit_gen.h | 3 + paddle/fluid/platform/cpu_info.cc | 7 +- paddle/fluid/platform/dynload/CMakeLists.txt | 2 - paddle/fluid/platform/dynload/cudnn.cc | 4 + paddle/fluid/platform/dynload/cudnn.h | 2 +- .../fluid/platform/dynload/dynamic_loader.cc | 16 ++++ .../fluid/platform/dynload/dynamic_loader.h | 6 ++ paddle/fluid/platform/dynload/mklml.h | 2 +- paddle/fluid/platform/dynload/tensorrt.h | 2 +- paddle/fluid/platform/dynload/warpctc.h | 2 +- paddle/fluid/platform/port.h | 5 +- paddle/fluid/train/demo/CMakeLists.txt | 18 +++- python/CMakeLists.txt | 16 ++-- python/paddle/fluid/__init__.py | 9 +- python/paddle/fluid/framework.py | 18 ++-- python/setup.py.in | 38 +++++---- 40 files changed, 315 insertions(+), 172 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 653ae4ffe53..efdb451f659 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -125,16 +125,12 @@ if(ANDROID OR IOS) add_definitions(-DPADDLE_MOBILE_INFERENCE) endif() -if (APPLE OR WIN32) +if (APPLE) set(WITH_MKL OFF CACHE STRING - "Disable MKL for building on mac and windows" FORCE) + "Disable MKL for building on mac" FORCE) endif() if (WIN32) - set(WITH_DSO OFF CACHE STRING - "Disable DSO when compiling for Windows" FORCE) - set(WITH_MKL OFF CACHE STRING - "Disable MKL when compiling for Windows" FORCE) set(WITH_DISTRIBUTE OFF CACHE STRING "Disable DISTRIBUTE when compiling for Windows" FORCE) set(WITH_C_API OFF CACHE STRING @@ -207,10 +203,10 @@ include(external/xxhash) # download xxhash include(external/dlpack) include(external/snappy) # download snappy include(external/snappystream) # download snappystream +include(external/warpctc) # download, build, install warpctc if (NOT WIN32) -# there is no official support of warpctc, nccl, cupti in windows -include(external/warpctc) # download, build, install warpctc +# there is no official support of nccl, cupti in windows include(cupti) include(external/gzstream) endif (NOT WIN32) diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake index 414e92eb27f..5be7be64137 100644 --- a/cmake/cuda.cmake +++ b/cmake/cuda.cmake @@ -139,10 +139,12 @@ endfunction() message(STATUS "CUDA detected: " ${CUDA_VERSION}) if (${CUDA_VERSION} LESS 7.0) set(paddle_known_gpu_archs ${paddle_known_gpu_archs}) + add_definitions("-DPADDLE_CUDA_BINVER=\"60\"") elseif (${CUDA_VERSION} LESS 8.0) # CUDA 7.x set(paddle_known_gpu_archs ${paddle_known_gpu_archs7}) list(APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED") list(APPEND CUDA_NVCC_FLAGS "-D__STRICT_ANSI__") + add_definitions("-DPADDLE_CUDA_BINVER=\"70\"") elseif (${CUDA_VERSION} LESS 9.0) # CUDA 8.x set(paddle_known_gpu_archs ${paddle_known_gpu_archs8}) list(APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED") @@ -150,6 +152,7 @@ elseif (${CUDA_VERSION} LESS 9.0) # CUDA 8.x # CUDA 8 may complain that sm_20 is no longer supported. Suppress the # warning for now. list(APPEND CUDA_NVCC_FLAGS "-Wno-deprecated-gpu-targets") + add_definitions("-DPADDLE_CUDA_BINVER=\"80\"") endif() include_directories(${CUDA_INCLUDE_DIRS}) diff --git a/cmake/cudnn.cmake b/cmake/cudnn.cmake index 09bec347dbd..96a9917e762 100644 --- a/cmake/cudnn.cmake +++ b/cmake/cudnn.cmake @@ -89,6 +89,7 @@ if(CUDNN_FOUND) if(NOT CUDNN_MAJOR_VERSION) set(CUDNN_VERSION "???") else() + add_definitions("-DPADDLE_CUDNN_BINVER=\"${CUDNN_MAJOR_VERSION}\"") math(EXPR CUDNN_VERSION "${CUDNN_MAJOR_VERSION} * 1000 + ${CUDNN_MINOR_VERSION} * 100 + ${CUDNN_PATCHLEVEL_VERSION}") diff --git a/cmake/external/cub.cmake b/cmake/external/cub.cmake index c94849cf4b9..f06728de91e 100644 --- a/cmake/external/cub.cmake +++ b/cmake/external/cub.cmake @@ -32,4 +32,4 @@ endif() add_dependencies(cub extern_cub) -LIST(APPEND externl_project_dependencies cub) +LIST(APPEND external_project_dependencies cub) diff --git a/cmake/external/dlpack.cmake b/cmake/external/dlpack.cmake index 94d8fcc6685..4587475d790 100644 --- a/cmake/external/dlpack.cmake +++ b/cmake/external/dlpack.cmake @@ -28,4 +28,4 @@ endif() add_dependencies(dlpack extern_dlpack) -LIST(APPEND externl_project_dependencies dlpack) +LIST(APPEND external_project_dependencies dlpack) diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake index b280db23b9b..c29375cd058 100644 --- a/cmake/external/mkldnn.cmake +++ b/cmake/external/mkldnn.cmake @@ -23,15 +23,14 @@ SET(MKLDNN_SOURCES_DIR ${THIRD_PARTY_PATH}/mkldnn) SET(MKLDNN_INSTALL_DIR ${THIRD_PARTY_PATH}/install/mkldnn) SET(MKLDNN_INC_DIR "${MKLDNN_INSTALL_DIR}/include" CACHE PATH "mkldnn include directory." FORCE) -IF(WIN32 OR APPLE) +IF(APPLE) MESSAGE(WARNING - "Windows or Mac is not supported with MKLDNN in Paddle yet." + "Mac is not supported with MKLDNN in Paddle yet." "Force WITH_MKLDNN=OFF") - SET(WITH_MKLDNN OFF CACHE STRING "Disable MKLDNN in Windows and MacOS" FORCE) + SET(WITH_MKLDNN OFF CACHE STRING "Disable MKLDNN in MacOS" FORCE) return() ENDIF() -SET(MKLDNN_LIB "${MKLDNN_INSTALL_DIR}/lib/libmkldnn.so" CACHE FILEPATH "mkldnn library." FORCE) MESSAGE(STATUS "Set ${MKLDNN_INSTALL_DIR}/lib to runtime path") SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLDNN_INSTALL_DIR}/lib") @@ -44,10 +43,14 @@ IF(${CBLAS_PROVIDER} STREQUAL "MKLML") ELSE() MESSAGE(FATAL_ERROR "Should enable MKLML when build MKLDNN") ENDIF() -SET(MKLDNN_FLAG "-Wno-error=strict-overflow -Wno-error=unused-result -Wno-error=array-bounds") -SET(MKLDNN_FLAG "${MKLDNN_FLAG} -Wno-unused-result -Wno-unused-value") -SET(MKLDNN_CFLAG "${CMAKE_C_FLAGS} ${MKLDNN_FLAG}") -SET(MKLDNN_CXXFLAG "${CMAKE_CXX_FLAGS} ${MKLDNN_FLAG}") + +IF(NOT WIN32) + SET(MKLDNN_FLAG "-Wno-error=strict-overflow -Wno-error=unused-result -Wno-error=array-bounds") + SET(MKLDNN_FLAG "${MKLDNN_FLAG} -Wno-unused-result -Wno-unused-value") + SET(MKLDNN_CFLAG "${CMAKE_C_FLAGS} ${MKLDNN_FLAG}") + SET(MKLDNN_CXXFLAG "${CMAKE_CXX_FLAGS} ${MKLDNN_FLAG}") +ENDIF(NOT WIN32) + ExternalProject_Add( ${MKLDNN_PROJECT} ${EXTERNAL_PROJECT_LOG_ARGS} @@ -58,8 +61,15 @@ ExternalProject_Add( UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + CMAKE_ARGS -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} + CMAKE_ARGS -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} + CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + CMAKE_ARGS -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} + CMAKE_ARGS -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE} CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR} CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON CMAKE_ARGS -DMKLROOT=${MKLML_ROOT} CMAKE_ARGS -DCMAKE_C_FLAGS=${MKLDNN_CFLAG} CMAKE_ARGS -DCMAKE_CXX_FLAGS=${MKLDNN_CXXFLAG} @@ -67,6 +77,11 @@ ExternalProject_Add( CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLDNN_INSTALL_DIR} -DMKLROOT:PATH=${MKLML_ROOT} ) +if(WIN32) + SET(MKLDNN_LIB "${MKLDNN_INSTALL_DIR}/lib/mkldnn.lib" CACHE FILEPATH "mkldnn library." FORCE) +else(WIN32) + SET(MKLDNN_LIB "${MKLDNN_INSTALL_DIR}/lib/libmkldnn.so" CACHE FILEPATH "mkldnn library." FORCE) +endif(WIN32) ADD_LIBRARY(shared_mkldnn SHARED IMPORTED GLOBAL) SET_PROPERTY(TARGET shared_mkldnn PROPERTY IMPORTED_LOCATION ${MKLDNN_LIB}) @@ -85,10 +100,14 @@ ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT}) # copy the real so.0 lib to install dir # it can be directly contained in wheel or capi -SET(MKLDNN_SHARED_LIB ${MKLDNN_INSTALL_DIR}/libmkldnn.so.0) -ADD_CUSTOM_COMMAND(OUTPUT ${MKLDNN_SHARED_LIB} - COMMAND cp ${MKLDNN_LIB} ${MKLDNN_SHARED_LIB} - DEPENDS mkldnn) +if(WIN32) + SET(MKLDNN_SHARED_LIB ${MKLDNN_INSTALL_DIR}/lib/mkldnn.dll) +else(WIN32) + SET(MKLDNN_SHARED_LIB ${MKLDNN_INSTALL_DIR}/libmkldnn.so.0) + ADD_CUSTOM_COMMAND(OUTPUT ${MKLDNN_SHARED_LIB} + COMMAND ${CMAKE_COMMAND} -E copy ${MKLDNN_LIB} ${MKLDNN_SHARED_LIB} + DEPENDS mkldnn) +endif(WIN32) ADD_CUSTOM_TARGET(mkldnn_shared_lib ALL DEPENDS ${MKLDNN_SHARED_LIB}) IF(WITH_C_API) diff --git a/cmake/external/mklml.cmake b/cmake/external/mklml.cmake index dc5427acd45..3da552e3190 100644 --- a/cmake/external/mklml.cmake +++ b/cmake/external/mklml.cmake @@ -16,59 +16,76 @@ IF(NOT ${WITH_MKLML}) return() ENDIF(NOT ${WITH_MKLML}) -IF(WIN32 OR APPLE) +IF(APPLE) MESSAGE(WARNING - "Windows or Mac is not supported with MKLML in Paddle yet." + "Mac is not supported with MKLML in Paddle yet." "Force WITH_MKLML=OFF") SET(WITH_MKLML OFF CACHE STRING "Disable MKLML package in Windows and MacOS" FORCE) return() ENDIF() INCLUDE(ExternalProject) - -SET(MKLML_PROJECT "extern_mklml") -IF((NOT DEFINED MKLML_VER) OR (NOT DEFINED MKLML_URL)) - MESSAGE(STATUS "use pre defined download url") - SET(MKLML_VER "mklml_lnx_2019.0.20180710" CACHE STRING "" FORCE) - SET(MKLML_URL "http://paddlepaddledeps.cdn.bcebos.com/${MKLML_VER}.tgz" CACHE STRING "" FORCE) -ENDIF() -MESSAGE(STATUS "MKLML_VER: ${MKLML_VER}, MKLML_URL: ${MKLML_URL}") -SET(MKLML_SOURCE_DIR "${THIRD_PARTY_PATH}/mklml") -SET(MKLML_DOWNLOAD_DIR "${MKLML_SOURCE_DIR}/src/${MKLML_PROJECT}") SET(MKLML_DST_DIR "mklml") SET(MKLML_INSTALL_ROOT "${THIRD_PARTY_PATH}/install") SET(MKLML_INSTALL_DIR ${MKLML_INSTALL_ROOT}/${MKLML_DST_DIR}) SET(MKLML_ROOT ${MKLML_INSTALL_DIR}) SET(MKLML_INC_DIR ${MKLML_ROOT}/include) SET(MKLML_LIB_DIR ${MKLML_ROOT}/lib) -SET(MKLML_LIB ${MKLML_LIB_DIR}/libmklml_intel.so) -SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5.so) +if(WIN32) + SET(MKLML_LIB ${MKLML_LIB_DIR}/mklml.lib) + SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5md.lib) + SET(MKLML_SHARED_LIB ${MKLML_LIB_DIR}/mklml.dll) + SET(MKLML_SHARED_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5md.dll) +else() + SET(MKLML_LIB ${MKLML_LIB_DIR}/libmklml_intel.so) + SET(MKLML_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5.so) + SET(MKLML_SHARED_LIB ${MKLML_LIB_DIR}/libmklml_intel.so) + SET(MKLML_SHARED_IOMP_LIB ${MKLML_LIB_DIR}/libiomp5.so) +endif() SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${MKLML_ROOT}/lib") -INCLUDE_DIRECTORIES(${MKLML_INC_DIR}) +if(WIN32) + MESSAGE(WARNING + "Please download the MKLML and and put it at " ${THIRD_PARTY_PATH}/install/mklml) +else() + SET(MKLML_PROJECT "extern_mklml") + IF((NOT DEFINED MKLML_VER) OR (NOT DEFINED MKLML_URL)) + MESSAGE(STATUS "use pre defined download url") + SET(MKLML_VER "mklml_lnx_2019.0.20180710" CACHE STRING "" FORCE) + SET(MKLML_URL "http://paddlepaddledeps.cdn.bcebos.com/${MKLML_VER}.tgz" CACHE STRING "" FORCE) + ENDIF() + MESSAGE(STATUS "MKLML_VER: ${MKLML_VER}, MKLML_URL: ${MKLML_URL}") + SET(MKLML_SOURCE_DIR "${THIRD_PARTY_PATH}/mklml") + SET(MKLML_DOWNLOAD_DIR "${MKLML_SOURCE_DIR}/src/${MKLML_PROJECT}") -FILE(WRITE ${MKLML_DOWNLOAD_DIR}/CMakeLists.txt - "PROJECT(MKLML)\n" - "cmake_minimum_required(VERSION 3.0)\n" - "install(DIRECTORY ${MKLML_VER}/include ${MKLML_VER}/lib \n" - " DESTINATION ${MKLML_DST_DIR})\n") + FILE(WRITE ${MKLML_DOWNLOAD_DIR}/CMakeLists.txt + "PROJECT(MKLML)\n" + "cmake_minimum_required(VERSION 3.0)\n" + "install(DIRECTORY ${MKLML_VER}/include ${MKLML_VER}/lib \n" + " DESTINATION ${MKLML_DST_DIR})\n") -ExternalProject_Add( - ${MKLML_PROJECT} - ${EXTERNAL_PROJECT_LOG_ARGS} - PREFIX ${MKLML_SOURCE_DIR} - DOWNLOAD_DIR ${MKLML_DOWNLOAD_DIR} - DOWNLOAD_COMMAND wget --no-check-certificate ${MKLML_URL} -c -q -O ${MKLML_VER}.tgz - && tar zxf ${MKLML_VER}.tgz - DOWNLOAD_NO_PROGRESS 1 - UPDATE_COMMAND "" - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLML_INSTALL_ROOT} - CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLML_INSTALL_ROOT} -) + ExternalProject_Add( + ${MKLML_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + PREFIX ${MKLML_SOURCE_DIR} + DOWNLOAD_DIR ${MKLML_DOWNLOAD_DIR} + DOWNLOAD_COMMAND wget --no-check-certificate ${MKLML_URL} -c -q -O ${MKLML_VER}.tgz + && tar zxf ${MKLML_VER}.tgz + DOWNLOAD_NO_PROGRESS 1 + UPDATE_COMMAND "" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLML_INSTALL_ROOT} + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${MKLML_INSTALL_ROOT} + ) +endif() + + +INCLUDE_DIRECTORIES(${MKLML_INC_DIR}) ADD_LIBRARY(mklml SHARED IMPORTED GLOBAL) SET_PROPERTY(TARGET mklml PROPERTY IMPORTED_LOCATION ${MKLML_LIB}) -ADD_DEPENDENCIES(mklml ${MKLML_PROJECT}) +if(NOT WIN32) + ADD_DEPENDENCIES(mklml ${MKLML_PROJECT}) +endif() LIST(APPEND external_project_dependencies mklml) IF(WITH_C_API) diff --git a/cmake/external/python.cmake b/cmake/external/python.cmake index a3599dd798c..edfb655541f 100644 --- a/cmake/external/python.cmake +++ b/cmake/external/python.cmake @@ -23,9 +23,12 @@ FIND_PACKAGE(PythonLibs ${PY_VERSION}) if(WIN32) execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" -"from distutils import sysconfig as s;import sys;import struct; +"from distutils import sysconfig as s;import sys;import struct;import sysconfig; print(sys.prefix); print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION')); +print(sysconfig.get_platform()); +print(sysconfig.get_config_var('py_version_nodot')); +print(sysconfig.get_config_var('SOABI')); " RESULT_VARIABLE _PYTHON_SUCCESS OUTPUT_VARIABLE _PYTHON_VALUES @@ -41,6 +44,9 @@ print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION')); string(REGEX REPLACE "\n" ";" _PYTHON_VALUES ${_PYTHON_VALUES}) list(GET _PYTHON_VALUES 0 PYTHON_PREFIX) list(GET _PYTHON_VALUES 1 PYTHON_LIBRARY_SUFFIX) + list(GET _PYTHON_VALUES 2 SYS_PLATFORM) + list(GET _PYTHON_VALUES 3 PYTHON_SHORT_VERSION_NODOT) + list(GET _PYTHON_VALUES 4 PYTHON_SOABI) # Make sure all directory separators are '/' string(REGEX REPLACE "\\\\" "/" PYTHON_PREFIX ${PYTHON_PREFIX}) diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake index 07e1137e16a..7b937c93feb 100644 --- a/cmake/external/warpctc.cmake +++ b/cmake/external/warpctc.cmake @@ -26,25 +26,33 @@ SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include" # Used in unit test test_WarpCTCLayer SET(WARPCTC_LIB_DIR "${WARPCTC_INSTALL_DIR}/lib" CACHE PATH "Warp-ctc Library Directory" FORCE) -SET(WARPCTC_LIBRARIES "${WARPCTC_INSTALL_DIR}/lib/libwarpctc${CMAKE_SHARED_LIBRARY_SUFFIX}" - CACHE FILEPATH "Warp-ctc Library" FORCE) -IF(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" ) +IF(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR WIN32) SET(USE_OMP OFF) ELSE() SET(USE_OMP ON) ENDIF() +IF(WIN32) + SET(WARPCTC_REPOSITORY "https://github.com/wopeizl/warp-ctc.git") +ELSE() + SET(WARPCTC_REPOSITORY "https://github.com/dzhwinter/warp-ctc.git") +ENDIF() + ExternalProject_Add( extern_warpctc ${EXTERNAL_PROJECT_LOG_ARGS} - GIT_REPOSITORY "https://github.com/dzhwinter/warp-ctc.git" + GIT_REPOSITORY ${WARPCTC_REPOSITORY} PREFIX ${WARPCTC_SOURCES_DIR} UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} + -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} + -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} -DCMAKE_INSTALL_PREFIX=${WARPCTC_INSTALL_DIR} -DWITH_GPU=${WITH_GPU} -DWITH_OMP=${USE_OMP} @@ -59,6 +67,18 @@ ExternalProject_Add( -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_INSTALL_PREFIX:PATH=${WARPCTC_INSTALL_DIR} ) +IF(WIN32) + IF(NOT EXISTS "${WARPCTC_INSTALL_DIR}/lib/warpctc${CMAKE_SHARED_LIBRARY_SUFFIX}") + add_custom_command(TARGET extern_warpctc POST_BUILD + COMMAND cmake -E copy ${WARPCTC_INSTALL_DIR}/bin/warpctc${CMAKE_SHARED_LIBRARY_SUFFIX} ${WARPCTC_INSTALL_DIR}/lib/warpctc${CMAKE_SHARED_LIBRARY_SUFFIX} + ) + ENDIF() + SET(WARPCTC_LIBRARIES "${WARPCTC_INSTALL_DIR}/lib/warpctc${CMAKE_SHARED_LIBRARY_SUFFIX}" + CACHE FILEPATH "Warp-ctc Library" FORCE) +else(WIN32) + SET(WARPCTC_LIBRARIES "${WARPCTC_INSTALL_DIR}/lib/libwarpctc${CMAKE_SHARED_LIBRARY_SUFFIX}" + CACHE FILEPATH "Warp-ctc Library" FORCE) +ENDIF(WIN32) MESSAGE(STATUS "warp-ctc library: ${WARPCTC_LIBRARIES}") INCLUDE_DIRECTORIES(${WARPCTC_INCLUDE_DIR}) # For warpctc code to include its headers. diff --git a/cmake/external/xbyak.cmake b/cmake/external/xbyak.cmake index 384c2f93282..42e39fb8134 100644 --- a/cmake/external/xbyak.cmake +++ b/cmake/external/xbyak.cmake @@ -13,8 +13,8 @@ # limitations under the License. set(WITH_XBYAK ON) -if(WIN32 OR APPLE) - SET(WITH_XBYAK OFF CACHE STRING "Disable XBYAK in Windows and MacOS" FORCE) +if(APPLE) + SET(WITH_XBYAK OFF CACHE STRING "Disable XBYAK in MacOS" FORCE) return() endif() diff --git a/cmake/generic.cmake b/cmake/generic.cmake index a8b9dcfcf5e..c6fe2e970d3 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -267,7 +267,11 @@ function(cc_library TARGET_NAME) list(APPEND cc_library_DEPS dynload_mklml) endif() add_dependencies(${TARGET_NAME} mklml) - target_link_libraries(${TARGET_NAME} "-L${MKLML_LIB_DIR} -liomp5 -Wl,--as-needed") + if(WIN32) + target_link_libraries(${TARGET_NAME} ${MKLML_IOMP_LIB}) + else(WIN32) + target_link_libraries(${TARGET_NAME} "-L${MKLML_LIB_DIR} -liomp5 -Wl,--as-needed") + endif(WIN32) endif() # remove link to python, see notes at: # https://github.com/pybind/pybind11/blob/master/docs/compiling.rst#building-manually diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake index 5aa7a8a752f..a5b70b3c33f 100644 --- a/cmake/inference_lib.cmake +++ b/cmake/inference_lib.cmake @@ -115,20 +115,20 @@ if (NOT PROTOBUF_FOUND OR WIN32) ) endif () -if (NOT CBLAS_FOUND) - set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/openblas") - copy(openblas_lib - SRCS ${CBLAS_INSTALL_DIR}/lib ${CBLAS_INSTALL_DIR}/include - DSTS ${dst_dir} ${dst_dir} - DEPS extern_openblas - ) -elseif (WITH_MKLML) +if (WITH_MKLML) set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/mklml") copy(mklml_lib SRCS ${MKLML_LIB} ${MKLML_IOMP_LIB} ${MKLML_INC_DIR} DSTS ${dst_dir}/lib ${dst_dir}/lib ${dst_dir} DEPS mklml ) +elseif (NOT CBLAS_FOUND OR WIN32) + set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/openblas") + copy(openblas_lib + SRCS ${CBLAS_INSTALL_DIR}/lib ${CBLAS_INSTALL_DIR}/include + DSTS ${dst_dir} ${dst_dir} + DEPS extern_openblas + ) endif () if (WITH_MKLDNN) diff --git a/cmake/operators.cmake b/cmake/operators.cmake index 2ced43f9e6c..70d159b4f35 100644 --- a/cmake/operators.cmake +++ b/cmake/operators.cmake @@ -84,7 +84,7 @@ function(op_library TARGET) endif() if (WIN32) # remove windows unsupported op, because windows has no nccl, no warpctc such ops. - foreach(windows_unsupport_op "nccl_op" "gen_nccl_id_op" "warpctc_op") + foreach(windows_unsupport_op "nccl_op" "gen_nccl_id_op") if ("${TARGET}" STREQUAL "${windows_unsupport_op}") return() endif() diff --git a/cmake/simd.cmake b/cmake/simd.cmake index 86096d4feaa..566dc75fda0 100644 --- a/cmake/simd.cmake +++ b/cmake/simd.cmake @@ -57,46 +57,43 @@ int main() return 0; }" SSE3_FOUND) -# disable AVX by default on windows -if(NOT WIN32) - # Check AVX - set(CMAKE_REQUIRED_FLAGS ${AVX_FLAG}) - set(AVX_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE) - CHECK_CXX_SOURCE_RUNS(" - #include - int main() - { - __m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f); - __m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); - __m256 result = _mm256_add_ps (a, b); - return 0; - }" AVX_FOUND) +# Check AVX +set(CMAKE_REQUIRED_FLAGS ${AVX_FLAG}) +set(AVX_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE) +CHECK_CXX_SOURCE_RUNS(" +#include +int main() +{ + __m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f); + __m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f); + __m256 result = _mm256_add_ps (a, b); + return 0; +}" AVX_FOUND) - # Check AVX 2 - set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG}) - set(AVX2_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE) - CHECK_CXX_SOURCE_RUNS(" - #include - int main() - { - __m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4); - __m256i result = _mm256_abs_epi32 (a); - return 0; - }" AVX2_FOUND) +# Check AVX 2 +set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG}) +set(AVX2_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE) +CHECK_CXX_SOURCE_RUNS(" +#include +int main() +{ + __m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4); + __m256i result = _mm256_abs_epi32 (a); + return 0; +}" AVX2_FOUND) - # Check AVX512F - set(CMAKE_REQUIRED_FLAGS ${AVX512F_FLAG}) - set(AVX512F_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE) - CHECK_CXX_SOURCE_RUNS(" - #include - int main() - { - __m512i a = _mm512_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4, - 13, -5, 6, -7, 9, 2, -6, 3); - __m512i result = _mm512_abs_epi32 (a); - return 0; - }" AVX512F_FOUND) -endif(NOT WIN32) +# Check AVX512F +set(CMAKE_REQUIRED_FLAGS ${AVX512F_FLAG}) +set(AVX512F_FOUND_EXITCODE 1 CACHE STRING "Result from TRY_RUN" FORCE) +CHECK_CXX_SOURCE_RUNS(" +#include +int main() +{ + __m512i a = _mm512_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4, + 13, -5, 6, -7, 9, 2, -6, 3); + __m512i result = _mm512_abs_epi32 (a); + return 0; +}" AVX512F_FOUND) set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_RETAINED}) mark_as_advanced(MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND AVX512F_FOUND) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 225dfb3e700..90083f690fe 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -15,8 +15,7 @@ function(windows_symbolic TARGET) file(GENERATE OUTPUT ${final_path}/.${src}.cu INPUT ${final_path}/${src}.cc) add_custom_command(OUTPUT ${final_path}/.${src}.cu - COMMAND ${CMAKE_COMMAND} -E remove ${final_path}/.${src}.cu - COMMAND ${CMAKE_COMMAND} -E copy "${final_path}/${src}.cc" "${final_path}/.${src}.cu" + COMMAND ${CMAKE_COMMAND} -E copy_if_different "${final_path}/${src}.cc" "${final_path}/.${src}.cu" COMMENT "create hidden file of ${src}.cu") add_custom_target(${TARGET} ALL DEPENDS .${src}.cu) endforeach() diff --git a/paddle/fluid/framework/details/all_reduce_op_handle.cc b/paddle/fluid/framework/details/all_reduce_op_handle.cc index 9eaff1f5601..de7c845884d 100644 --- a/paddle/fluid/framework/details/all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/all_reduce_op_handle.cc @@ -50,7 +50,7 @@ void AllReduceOpHandle::RunImpl() { // FIXME(typhoonzero): If scope0(global scope) have NCCL_ID_VAR, // this is a distributed or inter-process call, find a better way. -#ifdef PADDLE_WITH_CUDA +#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32) if (NoDummyInputSize() == 1 && local_scopes_[0]->FindLocalVar(NCCL_ID_VARNAME) == nullptr) { #else diff --git a/paddle/fluid/framework/mixed_vector.h b/paddle/fluid/framework/mixed_vector.h index 6940250c3f9..c3a044d22cf 100644 --- a/paddle/fluid/framework/mixed_vector.h +++ b/paddle/fluid/framework/mixed_vector.h @@ -215,8 +215,8 @@ class Vector { auto stream = dev_ctx->stream(); void *src = gpu_->ptr(); void *dst = cpu_.data(); - memory::Copy(platform::CPUPlace(), dst, CUDAPlace().get(), src, - gpu_->size(), stream); + paddle::memory::Copy(platform::CPUPlace(), dst, CUDAPlace().get(), src, + gpu_->size(), stream); dev_ctx->Wait(); } @@ -261,8 +261,8 @@ class Vector { auto *dev_ctx = static_cast( platform::DeviceContextPool::Instance().Get(place)); auto stream = dev_ctx->stream(); - memory::Copy(CUDAPlace().get(), dst, platform::CPUPlace(), src, - gpu_->size(), stream); + paddle::memory::Copy(CUDAPlace().get(), dst, platform::CPUPlace(), src, + gpu_->size(), stream); } void ImmutableCPU() const { @@ -284,7 +284,7 @@ class Vector { bool IsInCPU() const { return flag_ & kDataInCPU; } mutable std::vector cpu_; - mutable memory::AllocationPtr gpu_; + mutable paddle::memory::AllocationPtr gpu_; mutable int flag_; mutable std::mutex mtx_; diff --git a/paddle/fluid/framework/op_registry.h b/paddle/fluid/framework/op_registry.h index 6d39bb3c524..2c1648c81fc 100644 --- a/paddle/fluid/framework/op_registry.h +++ b/paddle/fluid/framework/op_registry.h @@ -23,7 +23,8 @@ limitations under the License. */ #include #include -#include "glog/logging.h" // For VLOG() +#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h +#include "glog/logging.h" // For VLOG() #include "paddle/fluid/framework/attribute.h" #include "paddle/fluid/framework/details/op_registry.h" #include "paddle/fluid/framework/framework.pb.h" diff --git a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt index 8d0d96d391e..f42ee9a697b 100644 --- a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt +++ b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt @@ -89,12 +89,21 @@ endif() if(WITH_MKL) include_directories("${PADDLE_LIB}/third_party/install/mklml/include") - set(MATH_LIB ${PADDLE_LIB}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX} - ${PADDLE_LIB}/third_party/install/mklml/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX}) + if(NOT WIN32) + set(MATH_LIB ${PADDLE_LIB}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX} + ${PADDLE_LIB}/third_party/install/mklml/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX}) + else(WIN32) + set(MATH_LIB ${PADDLE_LIB}/third_party/install/mklml/lib/libmklml${CMAKE_SHARED_LIBRARY_SUFFIX} + ${PADDLE_LIB}/third_party/install/mklml/lib/libiomp5md${CMAKE_SHARED_LIBRARY_SUFFIX}) + endif(WIN32) set(MKLDNN_PATH "${PADDLE_LIB}/third_party/install/mkldnn") if(EXISTS ${MKLDNN_PATH}) include_directories("${MKLDNN_PATH}/include") - set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0) + if(WIN32) + set(MKLDNN_LIB ${MKLDNN_PATH}/lib/mkldnn.lib) + else(WIN32) + set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0) + endif(WIN32) endif() else() set(MATH_LIB ${PADDLE_LIB}/third_party/install/openblas/lib/libopenblas${CMAKE_STATIC_LIBRARY_SUFFIX}) diff --git a/paddle/fluid/memory/detail/system_allocator.cc b/paddle/fluid/memory/detail/system_allocator.cc index 3e8fb83e9d5..307c3488223 100644 --- a/paddle/fluid/memory/detail/system_allocator.cc +++ b/paddle/fluid/memory/detail/system_allocator.cc @@ -17,7 +17,6 @@ limitations under the License. */ #ifdef _WIN32 #include -#include // VirtualLock/VirtualUnlock #else #include // for mlock and munlock #endif diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 257bfc0a3f9..95ad67e33e2 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -44,9 +44,8 @@ endif() register_operators(EXCLUDES warpctc_op conv_fusion_op DEPS ${OP_HEADER_DEPS} ${OP_PREFETCH_DEPS}) - # warpctc_op needs cudnn 7 above -if (WITH_GPU AND NOT WIN32) +if (WITH_GPU) if (${CUDNN_MAJOR_VERSION} VERSION_LESS 7) op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale SRCS warpctc_op.cc warpctc_op.cu.cc) else() @@ -64,9 +63,7 @@ endif() set(COMMON_OP_DEPS ${OP_HEADER_DEPS}) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} selected_rows_functor selected_rows lod_tensor maxouting unpooling pooling lod_rank_table context_project sequence_pooling executor) -if (NOT WIN32) - set(COMMON_OP_DEPS ${COMMON_OP_DEPS} dynload_warpctc) -endif() +set(COMMON_OP_DEPS ${COMMON_OP_DEPS} dynload_warpctc) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence_padding sequence_scale cos_sim_functor memory jit_kernel concat_and_split cross_entropy softmax vol2col im2col sampler) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions) if (WITH_GPU) diff --git a/paddle/fluid/operators/cum_op.h b/paddle/fluid/operators/cum_op.h index 999fdcff907..7c0fda4169b 100644 --- a/paddle/fluid/operators/cum_op.h +++ b/paddle/fluid/operators/cum_op.h @@ -13,6 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once + +#include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_mkldnn_op.cc b/paddle/fluid/operators/elementwise/elementwise_mul_mkldnn_op.cc index c600d1e3d76..bf9aef91350 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mul_mkldnn_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_mul_mkldnn_op.cc @@ -19,6 +19,9 @@ limitations under the License. */ #include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/operators/math/jit_kernel.h" +#if defined(_WIN32) && defined(_WINSOCKAPI_) +#define _WINSOCK2API_ /* Prevent inclusion of winsock2.h */ +#endif #include "xbyak/xbyak.h" #include "xbyak/xbyak_util.h" diff --git a/paddle/fluid/operators/math/detail/lstm_cpu_kernel.h b/paddle/fluid/operators/math/detail/lstm_cpu_kernel.h index ccbd05c82ad..2e3779ff084 100644 --- a/paddle/fluid/operators/math/detail/lstm_cpu_kernel.h +++ b/paddle/fluid/operators/math/detail/lstm_cpu_kernel.h @@ -17,6 +17,12 @@ limitations under the License. */ #include "paddle/fluid/operators/math/detail/activation_functions.h" #include "paddle/fluid/operators/math/lstm_compute.h" +#if defined(_WIN32) +#if defined(__AVX2__) || defined(__AVX__) +inline __m256 operator+=(__m256 a, __m256 b) { return _mm256_add_ps(a, b); } +#endif +#endif + namespace paddle { namespace operators { namespace math { diff --git a/paddle/fluid/operators/math/jit_gen.h b/paddle/fluid/operators/math/jit_gen.h index 6abf3434cc8..2bc740e5983 100644 --- a/paddle/fluid/operators/math/jit_gen.h +++ b/paddle/fluid/operators/math/jit_gen.h @@ -18,6 +18,9 @@ limitations under the License. */ #include #include "paddle/fluid/platform/macros.h" +#if defined(_WIN32) && defined(_WINSOCKAPI_) +#define _WINSOCK2API_ /* Prevent inclusion of winsock2.h */ +#endif #define XBYAK_USE_MMAP_ALLOCATOR #include "xbyak/xbyak.h" #include "xbyak/xbyak_util.h" diff --git a/paddle/fluid/platform/cpu_info.cc b/paddle/fluid/platform/cpu_info.cc index f9a32bfa4c1..1642c178097 100644 --- a/paddle/fluid/platform/cpu_info.cc +++ b/paddle/fluid/platform/cpu_info.cc @@ -14,6 +14,10 @@ limitations under the License. */ #include "paddle/fluid/platform/cpu_info.h" +#if defined(_WIN32) +#define NOMINMAX // msvc max/min macro conflict with std::min/max +#endif + #ifdef PADDLE_WITH_XBYAK #include "xbyak/xbyak.h" #include "xbyak/xbyak_util.h" @@ -22,9 +26,8 @@ limitations under the License. */ #ifdef __APPLE__ #include #include - #elif defined(_WIN32) -#define NOMINMAX // msvc max/min macro conflict with std::min/max +#define WIN32_LEAN_AND_MEAN #include #else #include diff --git a/paddle/fluid/platform/dynload/CMakeLists.txt b/paddle/fluid/platform/dynload/CMakeLists.txt index 5939c500c94..07159d4a12e 100644 --- a/paddle/fluid/platform/dynload/CMakeLists.txt +++ b/paddle/fluid/platform/dynload/CMakeLists.txt @@ -16,9 +16,7 @@ if (CUPTI_FOUND) list(APPEND CUDA_SRCS cupti.cc) endif(CUPTI_FOUND) nv_library(dynload_cuda SRCS ${CUDA_SRCS} DEPS dynamic_loader) -if (NOT WIN32) cc_library(dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc) -endif(NOT WIN32) if (WITH_MKLML) cc_library(dynload_mklml SRCS mklml.cc DEPS dynamic_loader mklml) endif() diff --git a/paddle/fluid/platform/dynload/cudnn.cc b/paddle/fluid/platform/dynload/cudnn.cc index f3cd3b2bbed..91d9a1ef013 100644 --- a/paddle/fluid/platform/dynload/cudnn.cc +++ b/paddle/fluid/platform/dynload/cudnn.cc @@ -38,6 +38,10 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_R4(DEFINE_WRAP); CUDNN_DNN_ROUTINE_EACH_R5(DEFINE_WRAP); #endif +#ifdef CUDNN_DNN_ROUTINE_EACH_R6 +CUDNN_DNN_ROUTINE_EACH_R6(DEFINE_WRAP); +#endif + #ifdef CUDNN_DNN_ROUTINE_EACH_R7 CUDNN_DNN_ROUTINE_EACH_R7(DEFINE_WRAP); #endif diff --git a/paddle/fluid/platform/dynload/cudnn.h b/paddle/fluid/platform/dynload/cudnn.h index 550fe2edee1..2f4f8101e4b 100644 --- a/paddle/fluid/platform/dynload/cudnn.h +++ b/paddle/fluid/platform/dynload/cudnn.h @@ -34,7 +34,7 @@ extern void EnforceCUDNNLoaded(const char* fn_name); #define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \ struct DynLoad__##__name { \ template \ - auto operator()(Args... args) -> decltype(__name(args...)) { \ + auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \ using cudnn_func = decltype(&::__name); \ std::call_once(cudnn_dso_flag, []() { \ cudnn_dso_handle = paddle::platform::dynload::GetCUDNNDsoHandle(); \ diff --git a/paddle/fluid/platform/dynload/dynamic_loader.cc b/paddle/fluid/platform/dynload/dynamic_loader.cc index cc5cda6106c..15d51683665 100644 --- a/paddle/fluid/platform/dynload/dynamic_loader.cc +++ b/paddle/fluid/platform/dynload/dynamic_loader.cc @@ -53,6 +53,12 @@ namespace platform { namespace dynload { static constexpr char cupti_lib_path[] = CUPTI_LIB_PATH; +#if defined(_WIN32) && defined(PADDLE_WITH_CUDA) +static constexpr char* win_cublas_lib = "cublas64_" PADDLE_CUDA_BINVER ".dll"; +static constexpr char* win_curand_lib = "curand64_" PADDLE_CUDA_BINVER ".dll"; +static constexpr char* win_cudnn_lib = "cudnn64_" PADDLE_CUDNN_BINVER ".dll"; +#endif + static inline std::string join(const std::string& part1, const std::string& part2) { // directory separator @@ -165,6 +171,8 @@ static inline void* GetDsoHandleFromSearchPath(const std::string& search_root, void* GetCublasDsoHandle() { #if defined(__APPLE__) || defined(__OSX__) return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcublas.dylib"); +#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA) + return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, win_cublas_lib); #else return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcublas.so"); #endif @@ -173,6 +181,8 @@ void* GetCublasDsoHandle() { void* GetCUDNNDsoHandle() { #if defined(__APPLE__) || defined(__OSX__) return GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, "libcudnn.dylib", false); +#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA) + return GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, win_cudnn_lib); #else return GetDsoHandleFromSearchPath(FLAGS_cudnn_dir, "libcudnn.so", false); #endif @@ -193,6 +203,8 @@ void* GetCUPTIDsoHandle() { void* GetCurandDsoHandle() { #if defined(__APPLE__) || defined(__OSX__) return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcurand.dylib"); +#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA) + return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, win_curand_lib); #else return GetDsoHandleFromSearchPath(FLAGS_cuda_dir, "libcurand.so"); #endif @@ -201,6 +213,8 @@ void* GetCurandDsoHandle() { void* GetWarpCTCDsoHandle() { #if defined(__APPLE__) || defined(__OSX__) return GetDsoHandleFromSearchPath(FLAGS_warpctc_dir, "libwarpctc.dylib"); +#elif defined(_WIN32) + return GetDsoHandleFromSearchPath(FLAGS_warpctc_dir, "warpctc.dll"); #else return GetDsoHandleFromSearchPath(FLAGS_warpctc_dir, "libwarpctc.so"); #endif @@ -225,6 +239,8 @@ void* GetTensorRtDsoHandle() { void* GetMKLMLDsoHandle() { #if defined(__APPLE__) || defined(__OSX__) return GetDsoHandleFromSearchPath(FLAGS_mklml_dir, "libmklml_intel.dylib"); +#elif defined(_WIN32) + return GetDsoHandleFromSearchPath(FLAGS_mklml_dir, "mklml.dll"); #else return GetDsoHandleFromSearchPath(FLAGS_mklml_dir, "libmklml_intel.so"); #endif diff --git a/paddle/fluid/platform/dynload/dynamic_loader.h b/paddle/fluid/platform/dynload/dynamic_loader.h index 84fd2ce9987..edb4c649add 100644 --- a/paddle/fluid/platform/dynload/dynamic_loader.h +++ b/paddle/fluid/platform/dynload/dynamic_loader.h @@ -18,6 +18,12 @@ namespace paddle { namespace platform { namespace dynload { +#ifndef _WIN32 +#define DECLARE_TYPE(__name, ...) decltype(__name(__VA_ARGS__)) +#else +#define DECLARE_TYPE(__name, ...) decltype(auto) +#endif + void* GetCublasDsoHandle(); void* GetCUDNNDsoHandle(); void* GetCUPTIDsoHandle(); diff --git a/paddle/fluid/platform/dynload/mklml.h b/paddle/fluid/platform/dynload/mklml.h index f0a97366236..944b00bae10 100644 --- a/paddle/fluid/platform/dynload/mklml.h +++ b/paddle/fluid/platform/dynload/mklml.h @@ -34,7 +34,7 @@ extern void* mklml_dso_handle; #define DYNAMIC_LOAD_MKLML_WRAP(__name) \ struct DynLoad__##__name { \ template \ - auto operator()(Args... args) -> decltype(__name(args...)) { \ + auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \ using mklmlFunc = decltype(&::__name); \ std::call_once(mklml_dso_flag, []() { \ mklml_dso_handle = paddle::platform::dynload::GetMKLMLDsoHandle(); \ diff --git a/paddle/fluid/platform/dynload/tensorrt.h b/paddle/fluid/platform/dynload/tensorrt.h index 5d67658b94a..751aa54b1ad 100644 --- a/paddle/fluid/platform/dynload/tensorrt.h +++ b/paddle/fluid/platform/dynload/tensorrt.h @@ -33,7 +33,7 @@ extern void* tensorrt_dso_handle; #define DECLARE_DYNAMIC_LOAD_TENSORRT_WRAP(__name) \ struct DynLoad__##__name { \ template \ - auto operator()(Args... args) -> decltype(__name(args...)) { \ + auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \ using tensorrt_func = decltype(__name(args...)) (*)(Args...); \ std::call_once(tensorrt_dso_flag, []() { \ tensorrt_dso_handle = \ diff --git a/paddle/fluid/platform/dynload/warpctc.h b/paddle/fluid/platform/dynload/warpctc.h index 18ed9956f18..bc1977b05de 100644 --- a/paddle/fluid/platform/dynload/warpctc.h +++ b/paddle/fluid/platform/dynload/warpctc.h @@ -34,7 +34,7 @@ extern void* warpctc_dso_handle; #define DYNAMIC_LOAD_WARPCTC_WRAP(__name) \ struct DynLoad__##__name { \ template \ - auto operator()(Args... args) -> decltype(__name(args...)) { \ + auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \ using warpctcFunc = decltype(&::__name); \ std::call_once(warpctc_dso_flag, []() { \ warpctc_dso_handle = paddle::platform::dynload::GetWarpCTCDsoHandle(); \ diff --git a/paddle/fluid/platform/port.h b/paddle/fluid/platform/port.h index ad070171df3..41388d8959e 100644 --- a/paddle/fluid/platform/port.h +++ b/paddle/fluid/platform/port.h @@ -37,6 +37,10 @@ #define GOOGLE_GLOG_DLL_DECL #include // _popen, _pclose #include +#ifdef _WINSOCKAPI_ +/* Prevent inclusion of winsock.h in windows.h */ +#define WIN32_LEAN_AND_MEAN +#endif #include #include // std::accumulate in msvc #ifndef S_ISDIR // windows port for sys/stat.h @@ -55,7 +59,6 @@ static void *dlsym(void *handle, const char *symbol_name) { static void *dlopen(const char *filename, int flag) { std::string file_name(filename); - file_name.replace(0, file_name.size() - 1, '/', '\\'); HMODULE hModule = LoadLibrary(file_name.c_str()); if (!hModule) { throw std::runtime_error(file_name + " not found."); diff --git a/paddle/fluid/train/demo/CMakeLists.txt b/paddle/fluid/train/demo/CMakeLists.txt index eabb51d370a..af033fa7407 100644 --- a/paddle/fluid/train/demo/CMakeLists.txt +++ b/paddle/fluid/train/demo/CMakeLists.txt @@ -35,16 +35,26 @@ add_executable(demo_trainer demo_trainer.cc) if(WITH_MKLDNN) include_directories("${PADDLE_LIB}/third_party/install/mkldnn/include") - set(MKLDNN_LIB ${PADDLE_LIB}/third_party/install/mkldnn/lib/libmkldnn.so.0) -endif() + if(WIN32) + set(MKLDNN_LIB ${PADDLE_LIB}/third_party/install/mkldnn/lib/mkldnn.lib) + else(WIN32) + set(MKLDNN_LIB ${PADDLE_LIB}/third_party/install/mkldnn/lib/libmkldnn.so.0) + endif(WIN32) +endif(WITH_MKLDNN) if(WITH_MKL) include_directories("${PADDLE_LIB}/third_party/install/mklml/include") - set(MATH_LIB ${PADDLE_LIB}/third_party/install/mklml/lib/libmklml_intel.so) + if(WIN32) + set(MATH_LIB ${PADDLE_LIB}/third_party/install/mklml/lib/mklml.lib) + else(WIN32) + set(MATH_LIB ${PADDLE_LIB}/third_party/install/mklml/lib/libmklml_intel.so) + endif(WIN32) else() if(APPLE) set(MATH_LIB cblas) - else(APPLE) + elseif(WIN32) + set(MATH_LIB ${PADDLE_LIB}/third_party/install/openblas/lib/libopenblas.lib) + else() set(MATH_LIB ${PADDLE_LIB}/third_party/install/openblas/lib/libopenblas.a) endif(APPLE) endif() diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 139176b0d6c..078d543ba2d 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -48,12 +48,18 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in IF(WIN32) # Python would use the .pyd by default under Windows series platform set(FLUID_DST_DIR ${PADDLE_BINARY_DIR}/python/paddle/fluid/) - get_filename_component(openblas_refpath ${CBLAS_LIBRARIES} DIRECTORY) set(FLUID_CORE ${FLUID_DST_DIR}/core.pyd) - add_custom_command(OUTPUT ${FLUID_CORE} - COMMAND cmake -E copy $ ${FLUID_CORE} - COMMAND cmake -E copy ${openblas_refpath}/openblas.dll ${FLUID_DST_DIR} - DEPENDS paddle_pybind) + if(NOT WITH_MKLDNN) + get_filename_component(openblas_refpath ${CBLAS_LIBRARIES} DIRECTORY) + add_custom_command(OUTPUT ${FLUID_CORE} + COMMAND cmake -E copy $ ${FLUID_CORE} + COMMAND cmake -E copy ${openblas_refpath}/openblas.dll ${FLUID_DST_DIR} + DEPENDS paddle_pybind) + else(NOT WITH_MKLDNN) + add_custom_command(OUTPUT ${FLUID_CORE} + COMMAND cmake -E copy $ ${FLUID_CORE} + DEPENDS paddle_pybind) + endif(NOT WITH_MKLDNN) ELSE() set(FLUID_CORE ${PADDLE_BINARY_DIR}/python/paddle/fluid/core.so) add_custom_command(OUTPUT ${FLUID_CORE} diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index 2dea71d7af9..fd788d09296 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -102,6 +102,12 @@ def __bootstrap__(): import sys import os import platform + + if os.name == 'nt': + third_lib_path = os.path.abspath(os.path.dirname(__file__)) + os.sep + '..' + os.sep + 'libs' + os.environ['path'] += ';' + third_lib_path + sys.path.append(third_lib_path) + from . import core in_test = 'unittest' in sys.modules @@ -128,13 +134,12 @@ def __bootstrap__(): 'free_idle_memory', 'paddle_num_threads', "dist_threadpool_size", 'eager_delete_tensor_gb', 'fast_eager_deletion_mode', 'allocator_strategy', 'reader_queue_speed_test_mode', - 'print_sub_graph_dir', 'pe_profile_fname' + 'print_sub_graph_dir', 'pe_profile_fname', 'warpctc_dir' ] if 'Darwin' not in sysstr: read_env_flags.append('use_pinned_memory') if os.name != 'nt': - read_env_flags.append('warpctc_dir') read_env_flags.append('cpu_deterministic') if core.is_compiled_with_dist(): diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 08979205946..da74fd41fcc 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -16,6 +16,7 @@ from __future__ import print_function import collections import contextlib +import os import re import six import sys @@ -27,11 +28,18 @@ from .proto import framework_pb2 try: from . import core except ImportError as e: - raise ImportError( - """NOTE: You may need to run \"export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH\" - if you encounters \"libmkldnn.so not found\" errors. If you have python - installed in other directory, replace \"/usr/local/lib\" with your own - directory. The original error is: \n""" + cpt.get_exception_message(e)) + if os.name == 'nt': + raise ImportError( + """NOTE: You may need to run \"set PATH=c:\python27\lib:%PATH%\" + if you encounters \"mkldnn.dll not found\" errors. If you have python + installed in other directory, replace \"c:\python27\lib" with your own + directory. The original error is: \n""" + cpt.get_exception_message(e)) + else: + raise ImportError( + """NOTE: You may need to run \"export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH\" + if you encounters \"libmkldnn.so not found\" errors. If you have python + installed in other directory, replace \"/usr/local/lib\" with your own + directory. The original error is: \n""" + cpt.get_exception_message(e)) except Exception as e: raise e from . import unique_name diff --git a/python/setup.py.in b/python/setup.py.in index 65620466412..f4613dd72de 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -158,27 +158,29 @@ if '${WITH_FLUID_ONLY}'== 'OFF': # put all thirdparty libraries in paddle.libs libs_path='${PADDLE_BINARY_DIR}/python/paddle/libs' -if os.name != 'nt': - package_data['paddle.libs']= [] - package_data['paddle.libs']=['libwarpctc' + ext_name] - shutil.copy('${WARPCTC_LIBRARIES}', libs_path) + +package_data['paddle.libs']= [] +package_data['paddle.libs']=[('libwarpctc' if os.name != 'nt' else 'warpctc') + ext_name] +shutil.copy('${WARPCTC_LIBRARIES}', libs_path) + if '${WITH_MKL}' == 'ON': - shutil.copy('${MKLML_LIB}', libs_path) - shutil.copy('${MKLML_IOMP_LIB}', libs_path) - package_data['paddle.libs']+=['libmklml_intel' + ext_name,'libiomp5' + ext_name] + shutil.copy('${MKLML_SHARED_LIB}', libs_path) + shutil.copy('${MKLML_SHARED_IOMP_LIB}', libs_path) + package_data['paddle.libs']+=[('libmklml_intel' if os.name != 'nt' else 'mklml') + ext_name, ('libiomp5' if os.name != 'nt' else 'libiomp5md') + ext_name] if '${WITH_MKLDNN}' == 'ON': if '${CMAKE_BUILD_TYPE}' == 'Release': - # only change rpath in Release mode. - # TODO(typhoonzero): use install_name_tool to patch mkl libs once - # we can support mkl on mac. - # - # change rpath of libmkldnn.so.0, add $ORIGIN/ to it. - # The reason is that all thirdparty libraries in the same directory, - # thus, libmkldnn.so.0 will find libmklml_intel.so and libiomp5.so. - command = "patchelf --set-rpath '$ORIGIN/' ${MKLDNN_SHARED_LIB}" - if os.system(command) != 0: - raise Exception("patch libmkldnn.so failed, command: %s" % command) - package_data['paddle.libs']+=['libmkldnn.so.0'] + if os.name != 'nt': + # only change rpath in Release mode. + # TODO(typhoonzero): use install_name_tool to patch mkl libs once + # we can support mkl on mac. + # + # change rpath of libmkldnn.so.0, add $ORIGIN/ to it. + # The reason is that all thirdparty libraries in the same directory, + # thus, libmkldnn.so.0 will find libmklml_intel.so and libiomp5.so. + command = "patchelf --set-rpath '$ORIGIN/' ${MKLDNN_SHARED_LIB}" + if os.system(command) != 0: + raise Exception("patch libmkldnn.so failed, command: %s" % command) + package_data['paddle.libs']+=['libmkldnn.so.0' if os.name != 'nt' else ('mkldnn' + ext_name)] shutil.copy('${MKLDNN_SHARED_LIB}', libs_path) if '${WITH_NGRAPH}' == 'ON': # only change rpath in Release mode, -- GitLab