未验证 提交 00463fdf 编写于 作者: D dzhwinter 提交者: GitHub

cudnn windows support (#12757)

* cudnn widndows

* "add comment"

* "windows support"

* "fix cmake error"
上级 c62f68cb
......@@ -212,6 +212,7 @@ elseif()
set(WITH_ANAKIN OFF CACHE STRING "Anakin is used in GPU only now." FORCE)
endif()
include(flags) # set paddle compile flags
include(cudnn) # set cudnn libraries, must before configure
include(cupti)
include(configure) # add paddle env configuration
......@@ -220,7 +221,6 @@ include(package) # set paddle packages
include(ccache) # set ccache for compilation
include(util) # set unittest and link libs
include(rdma) # set rdma libraries
include(flags) # set paddle compile flags
include(version) # set PADDLE_VERSION
include(coveralls) # set code coverage
include(inference_lib) # add paddle fluid inference libraries
......
......@@ -56,10 +56,6 @@ if(NOT CMAKE_CROSSCOMPILING)
set(SIMD_FLAG ${SSE3_FLAG})
endif()
endif()
if(UNIX AND NOT APPLE)
# except apple from nix*Os family
set(LINUX TRUE)
endif(UNIX AND NOT APPLE)
if(NOT WITH_GOLANG)
add_definitions(-DPADDLE_WITHOUT_GOLANG)
......
......@@ -25,8 +25,25 @@ list(APPEND CUDNN_CHECK_LIBRARY_DIRS
$ENV{CUDNN_ROOT}
$ENV{CUDNN_ROOT}/lib64
$ENV{CUDNN_ROOT}/lib
/usr/lib)
find_library(CUDNN_LIBRARY NAMES libcudnn.so libcudnn.dylib # libcudnn_static.a
/usr/lib
${CUDA_TOOLKIT_ROOT_DIR}
${CUDA_TOOLKIT_ROOT_DIR}/lib/x64
)
set(CUDNN_LIB_NAME "")
if (LINUX)
set(CUDNN_LIB_NAME "libcudnn.so")
endif(LINUX)
if(WIN32)
# only support cudnn7
set(CUDNN_LIB_NAME "cudnn.lib" "cudnn64_7.dll")
endif(WIN32)
if(Apple)
set(CUDNN_LIB_NAME "libcudnn.dylib" "libcudnn.so")
endif(Apple)
find_library(CUDNN_LIBRARY NAMES ${CUDNN_LIB_NAME} # libcudnn_static.a
PATHS ${CUDNN_CHECK_LIBRARY_DIRS} ${CUDNN_INCLUDE_DIR} ${__libpath_hist}
NO_DEFAULT_PATH
DOC "Path to cuDNN library.")
......
......@@ -142,6 +142,11 @@ else()
${GPU_COMMON_FLAGS})
endif()
if(UNIX AND NOT APPLE)
# except apple from nix*Os family
set(LINUX TRUE)
endif(UNIX AND NOT APPLE)
foreach(flag ${COMMON_FLAGS})
safe_set_cflag(CMAKE_C_FLAGS ${flag})
......
......@@ -99,12 +99,13 @@ else()
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method)
endif()
if (NOT WIN32)
cc_library(parallel_executor SRCS parallel_executor.cc DEPS
threaded_ssa_graph_executor scope_buffered_ssa_graph_executor
graph graph_viz_pass multi_devices_graph_pass
multi_devices_graph_print_pass multi_devices_graph_check_pass
fast_threaded_ssa_graph_executor)
endif() # NOT WIN32
cc_library(prune SRCS prune.cc DEPS framework_proto)
cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context)
......
......@@ -84,6 +84,15 @@ function(op_library TARGET)
message(FATAL_ERROR "The op library ${TARGET} should contains at least one .cc file")
endif()
#remove windows unsupported op
if (WIN32)
foreach(windows_unsupport_op "nccl_op" "gen_nccl_id_op")
if ("${TARGET}" STREQUAL "${windows_unsupport_op}")
return()
endif()
endforeach()
endif(WIN32)
list(LENGTH op_library_DEPS op_library_DEPS_len)
if (${op_library_DEPS_len} GREATER 0)
set(DEPS_OPS ${TARGET} ${DEPS_OPS} PARENT_SCOPE)
......@@ -181,19 +190,19 @@ function(op_library TARGET)
endfunction()
add_subdirectory(math)
if (NOT WIN32)
add_subdirectory(nccl)
if(WITH_GPU)
op_library(nccl_op DEPS nccl_common)
file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(ncclAllReduce);\n")
else()
set(DEPS_OPS ${DEPS_OPS} nccl_op)
endif()
endif() # NOT WIN32
set(DISTRIBUTE_DEPS "")
if(WITH_DISTRIBUTE)
add_subdirectory(distributed)
set(DISTRIBUTE_DEPS "")
if(WITH_GRPC)
set(DISTRIBUTE_DEPS sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr cares zlib protobuf node)
......@@ -222,7 +231,7 @@ if(WITH_DISTRIBUTE)
#set_source_files_properties(send_recv_op_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
#cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS prefetch_op send_op
# listen_and_serv_op sum_op executor SERIAL)
if(WITH_GPU)
if(WITH_GPU AND NOT WIN32)
set_source_files_properties(test_send_nccl_id.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
cc_test(test_send_nccl_id SRCS test_send_nccl_id.cc DEPS listen_and_serv_op ${DISTRIBUTE_DEPS} executor SERIAL)
if(WITH_GRPC)
......@@ -233,7 +242,7 @@ if(WITH_DISTRIBUTE)
set_source_files_properties(gen_nccl_id_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
else()
set(DEPS_OPS ${DEPS_OPS} gen_nccl_id_op)
endif()
endif() # WITH_GPU AND NOT WIN32
else()
set(DEPS_OPS ${DEPS_OPS} checkpoint_notify_op prefetch_op recv_op listen_and_serv_op send_op send_barrier_op fetch_barrier_op gen_nccl_id_op)
endif()
......@@ -331,5 +340,7 @@ cc_test(beam_search_op_test SRCS beam_search_op_test.cc DEPS lod_tensor beam_sea
cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor memory)
cc_test(save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op)
cc_test(save_load_combine_op_test SRCS save_load_combine_op_test.cc DEPS save_combine_op load_combine_op)
if(NOT WIN32)
nv_test(nccl_op_test SRCS nccl_op_test.cu.cc DEPS nccl_op gpu_info device_context)
endif()
nv_test(dropout_op_test SRCS dropout_op_test.cc DEPS dropout_op tensor)
if(WITH_GPU)
if(WITH_GPU AND NOT WIN32)
nv_library(nccl_common SRCS nccl_gpu_common.cc DEPS device_context operator )
endif()
......@@ -3,7 +3,7 @@ cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags enforce)
list(APPEND CUDA_SRCS cublas.cc cudnn.cc curand.cc)
# There is no macOS version of NCCL.
if (NOT APPLE)
if (NOT APPLE AND NOT WIN32)
list(APPEND CUDA_SRCS nccl.cc)
endif()
......
......@@ -44,7 +44,7 @@ limitations under the License. */
#include "paddle/fluid/platform/dynload/cublas.h"
#include "paddle/fluid/platform/dynload/cudnn.h"
#include "paddle/fluid/platform/dynload/curand.h"
#ifndef __APPLE__
#if !defined(__APPLE__) and !defined(_WIN32)
#include "paddle/fluid/platform/dynload/nccl.h"
#endif // __APPLE__
#endif // PADDLE_WITH_CUDA
......@@ -205,7 +205,7 @@ inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
#endif
}
#ifndef __APPLE__
#if !defined(__APPLE__) and !defined(_WIN32)
template <typename... Args>
inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
ncclResult_t stat, const Args&... args) {
......@@ -221,7 +221,7 @@ inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
#endif
}
}
#endif // __APPLE__
#endif // __APPLE__ and windows
#endif // PADDLE_WITH_CUDA
template <typename T>
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册