From 00463fdfe39e26fa9fd9897d6c12499476691232 Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Mon, 20 Aug 2018 15:50:17 +0800 Subject: [PATCH] cudnn windows support (#12757) * cudnn widndows * "add comment" * "windows support" * "fix cmake error" --- CMakeLists.txt | 2 +- cmake/configure.cmake | 4 ---- cmake/cudnn.cmake | 21 ++++++++++++++++++-- cmake/flags.cmake | 5 +++++ paddle/fluid/framework/CMakeLists.txt | 3 ++- paddle/fluid/operators/CMakeLists.txt | 19 ++++++++++++++---- paddle/fluid/operators/nccl/CMakeLists.txt | 2 +- paddle/fluid/platform/dynload/CMakeLists.txt | 2 +- paddle/fluid/platform/enforce.h | 6 +++--- 9 files changed, 47 insertions(+), 17 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 684477271..ba9af2cc7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -212,6 +212,7 @@ elseif() set(WITH_ANAKIN OFF CACHE STRING "Anakin is used in GPU only now." FORCE) endif() +include(flags) # set paddle compile flags include(cudnn) # set cudnn libraries, must before configure include(cupti) include(configure) # add paddle env configuration @@ -220,7 +221,6 @@ include(package) # set paddle packages include(ccache) # set ccache for compilation include(util) # set unittest and link libs include(rdma) # set rdma libraries -include(flags) # set paddle compile flags include(version) # set PADDLE_VERSION include(coveralls) # set code coverage include(inference_lib) # add paddle fluid inference libraries diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 53454d79f..6d8e68df9 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -56,10 +56,6 @@ if(NOT CMAKE_CROSSCOMPILING) set(SIMD_FLAG ${SSE3_FLAG}) endif() endif() -if(UNIX AND NOT APPLE) - # except apple from nix*Os family - set(LINUX TRUE) -endif(UNIX AND NOT APPLE) if(NOT WITH_GOLANG) add_definitions(-DPADDLE_WITHOUT_GOLANG) diff --git a/cmake/cudnn.cmake b/cmake/cudnn.cmake index 9eebea816..cd5153392 100644 --- a/cmake/cudnn.cmake +++ b/cmake/cudnn.cmake @@ -25,8 +25,25 @@ list(APPEND CUDNN_CHECK_LIBRARY_DIRS $ENV{CUDNN_ROOT} $ENV{CUDNN_ROOT}/lib64 $ENV{CUDNN_ROOT}/lib - /usr/lib) -find_library(CUDNN_LIBRARY NAMES libcudnn.so libcudnn.dylib # libcudnn_static.a + /usr/lib + ${CUDA_TOOLKIT_ROOT_DIR} + ${CUDA_TOOLKIT_ROOT_DIR}/lib/x64 + ) +set(CUDNN_LIB_NAME "") +if (LINUX) +set(CUDNN_LIB_NAME "libcudnn.so") +endif(LINUX) + +if(WIN32) +# only support cudnn7 +set(CUDNN_LIB_NAME "cudnn.lib" "cudnn64_7.dll") +endif(WIN32) + +if(Apple) +set(CUDNN_LIB_NAME "libcudnn.dylib" "libcudnn.so") +endif(Apple) + +find_library(CUDNN_LIBRARY NAMES ${CUDNN_LIB_NAME} # libcudnn_static.a PATHS ${CUDNN_CHECK_LIBRARY_DIRS} ${CUDNN_INCLUDE_DIR} ${__libpath_hist} NO_DEFAULT_PATH DOC "Path to cuDNN library.") diff --git a/cmake/flags.cmake b/cmake/flags.cmake index 1120677a3..8ac157c4d 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -142,6 +142,11 @@ else() ${GPU_COMMON_FLAGS}) endif() +if(UNIX AND NOT APPLE) + # except apple from nix*Os family + set(LINUX TRUE) +endif(UNIX AND NOT APPLE) + foreach(flag ${COMMON_FLAGS}) safe_set_cflag(CMAKE_C_FLAGS ${flag}) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 8e69f8f0b..2ec422cc1 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -99,12 +99,13 @@ else() cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method) endif() - +if (NOT WIN32) cc_library(parallel_executor SRCS parallel_executor.cc DEPS threaded_ssa_graph_executor scope_buffered_ssa_graph_executor graph graph_viz_pass multi_devices_graph_pass multi_devices_graph_print_pass multi_devices_graph_check_pass fast_threaded_ssa_graph_executor) +endif() # NOT WIN32 cc_library(prune SRCS prune.cc DEPS framework_proto) cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context) diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index e29fe2a42..ed8e9ed77 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -84,6 +84,15 @@ function(op_library TARGET) message(FATAL_ERROR "The op library ${TARGET} should contains at least one .cc file") endif() + #remove windows unsupported op + if (WIN32) + foreach(windows_unsupport_op "nccl_op" "gen_nccl_id_op") + if ("${TARGET}" STREQUAL "${windows_unsupport_op}") + return() + endif() + endforeach() + endif(WIN32) + list(LENGTH op_library_DEPS op_library_DEPS_len) if (${op_library_DEPS_len} GREATER 0) set(DEPS_OPS ${TARGET} ${DEPS_OPS} PARENT_SCOPE) @@ -181,19 +190,19 @@ function(op_library TARGET) endfunction() add_subdirectory(math) +if (NOT WIN32) add_subdirectory(nccl) - if(WITH_GPU) op_library(nccl_op DEPS nccl_common) file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(ncclAllReduce);\n") else() set(DEPS_OPS ${DEPS_OPS} nccl_op) endif() +endif() # NOT WIN32 set(DISTRIBUTE_DEPS "") if(WITH_DISTRIBUTE) add_subdirectory(distributed) - set(DISTRIBUTE_DEPS "") if(WITH_GRPC) set(DISTRIBUTE_DEPS sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr cares zlib protobuf node) @@ -222,7 +231,7 @@ if(WITH_DISTRIBUTE) #set_source_files_properties(send_recv_op_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) #cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS prefetch_op send_op # listen_and_serv_op sum_op executor SERIAL) - if(WITH_GPU) + if(WITH_GPU AND NOT WIN32) set_source_files_properties(test_send_nccl_id.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) cc_test(test_send_nccl_id SRCS test_send_nccl_id.cc DEPS listen_and_serv_op ${DISTRIBUTE_DEPS} executor SERIAL) if(WITH_GRPC) @@ -233,7 +242,7 @@ if(WITH_DISTRIBUTE) set_source_files_properties(gen_nccl_id_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) else() set(DEPS_OPS ${DEPS_OPS} gen_nccl_id_op) - endif() + endif() # WITH_GPU AND NOT WIN32 else() set(DEPS_OPS ${DEPS_OPS} checkpoint_notify_op prefetch_op recv_op listen_and_serv_op send_op send_barrier_op fetch_barrier_op gen_nccl_id_op) endif() @@ -331,5 +340,7 @@ cc_test(beam_search_op_test SRCS beam_search_op_test.cc DEPS lod_tensor beam_sea cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor memory) cc_test(save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op) cc_test(save_load_combine_op_test SRCS save_load_combine_op_test.cc DEPS save_combine_op load_combine_op) +if(NOT WIN32) nv_test(nccl_op_test SRCS nccl_op_test.cu.cc DEPS nccl_op gpu_info device_context) +endif() nv_test(dropout_op_test SRCS dropout_op_test.cc DEPS dropout_op tensor) diff --git a/paddle/fluid/operators/nccl/CMakeLists.txt b/paddle/fluid/operators/nccl/CMakeLists.txt index ce0ddd89b..cdcba8035 100644 --- a/paddle/fluid/operators/nccl/CMakeLists.txt +++ b/paddle/fluid/operators/nccl/CMakeLists.txt @@ -1,3 +1,3 @@ -if(WITH_GPU) +if(WITH_GPU AND NOT WIN32) nv_library(nccl_common SRCS nccl_gpu_common.cc DEPS device_context operator ) endif() diff --git a/paddle/fluid/platform/dynload/CMakeLists.txt b/paddle/fluid/platform/dynload/CMakeLists.txt index 9da787a40..07159d4a1 100644 --- a/paddle/fluid/platform/dynload/CMakeLists.txt +++ b/paddle/fluid/platform/dynload/CMakeLists.txt @@ -3,7 +3,7 @@ cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags enforce) list(APPEND CUDA_SRCS cublas.cc cudnn.cc curand.cc) # There is no macOS version of NCCL. -if (NOT APPLE) +if (NOT APPLE AND NOT WIN32) list(APPEND CUDA_SRCS nccl.cc) endif() diff --git a/paddle/fluid/platform/enforce.h b/paddle/fluid/platform/enforce.h index 81b5359b4..6c2331b75 100644 --- a/paddle/fluid/platform/enforce.h +++ b/paddle/fluid/platform/enforce.h @@ -44,7 +44,7 @@ limitations under the License. */ #include "paddle/fluid/platform/dynload/cublas.h" #include "paddle/fluid/platform/dynload/cudnn.h" #include "paddle/fluid/platform/dynload/curand.h" -#ifndef __APPLE__ +#if !defined(__APPLE__) and !defined(_WIN32) #include "paddle/fluid/platform/dynload/nccl.h" #endif // __APPLE__ #endif // PADDLE_WITH_CUDA @@ -205,7 +205,7 @@ inline typename std::enable_if::type throw_on_error( #endif } -#ifndef __APPLE__ +#if !defined(__APPLE__) and !defined(_WIN32) template inline typename std::enable_if::type throw_on_error( ncclResult_t stat, const Args&... args) { @@ -221,7 +221,7 @@ inline typename std::enable_if::type throw_on_error( #endif } } -#endif // __APPLE__ +#endif // __APPLE__ and windows #endif // PADDLE_WITH_CUDA template -- GitLab