diff --git a/CMakeLists.txt b/CMakeLists.txt index fb91e3b369c58a8b0287a8bed63245bd1a0ba4d3..5db5c228be2d6491463ec1ddb17de7bec730bd44 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -142,7 +142,6 @@ include(external/boost) # download boost include(external/any) # download libn::any include(external/eigen) # download eigen3 include(external/pybind11) # download pybind11 -include(external/nccl) include(external/cares) include(external/grpc) diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index 32e768fdf44586e64750be7fa53a2574f03fe0e3..0d0cee21d14f29c03ebabcb921ecc4f29f352b55 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -1,5 +1,5 @@ if(WITH_GPU) - cc_library(enforce SRCS enforce.cc DEPS nccl) + cc_library(enforce SRCS enforce.cc DEPS) else() cc_library(enforce SRCS enforce.cc) endif() diff --git a/paddle/fluid/platform/dynload/CMakeLists.txt b/paddle/fluid/platform/dynload/CMakeLists.txt index cf2081b434961c17c1b65509909699788d2b9ad9..264b4ebf2c06d9e688a32a223dff3ec079333fd9 100644 --- a/paddle/fluid/platform/dynload/CMakeLists.txt +++ b/paddle/fluid/platform/dynload/CMakeLists.txt @@ -1,4 +1,4 @@ cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags enforce) nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc nccl.cc - DEPS dynamic_loader nccl) + DEPS dynamic_loader) cc_library(dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc) diff --git a/paddle/fluid/platform/nccl_test.cu b/paddle/fluid/platform/nccl_test.cu index 7123035363343ce68b708acb68f2a05ece81d912..212ea8517e897f86a3c19bb5d996c567854811a6 100644 --- a/paddle/fluid/platform/nccl_test.cu +++ b/paddle/fluid/platform/nccl_test.cu @@ -89,6 +89,7 @@ TEST(NCCL, all_reduce) { VLOG(1) << "Invoking ncclAllReduce"; + dynload::ncclGroupStart(); for (int i = 0; i < dev_count; ++i) { VLOG(1) << "Invoking ncclAllReduce with device " << i; SetDeviceId(i); @@ -97,6 +98,7 @@ TEST(NCCL, all_reduce) { ncclSum, comms[i], data[i]->dev_ctx.stream())); VLOG(1) << "Invoked ncclAllReduce for device " << i; } + dynload::ncclGroupEnd(); VLOG(1) << "Invoked ncclAllReduce"; diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index 442a7ea883052e73a5d50d5558f57732be93fb3a..56fa138786104df3b67cd5248d1625509cc913d1 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -34,6 +34,7 @@ function cmake_gen() { Configuring cmake in /paddle/build ... -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release} ${PYTHON_FLAGS} + -DWITH_DSO=ON -DWITH_DOC=OFF -DWITH_GPU=${WITH_GPU:-OFF} -DWITH_DISTRIBUTE=${WITH_DISTRIBUTE:-OFF} @@ -57,6 +58,7 @@ EOF cmake .. \ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release} \ ${PYTHON_FLAGS} \ + -DWITH_DSO=ON \ -DWITH_DOC=OFF \ -DWITH_GPU=${WITH_GPU:-OFF} \ -DWITH_DISTRIBUTE=${WITH_DISTRIBUTE:-OFF} \ @@ -171,7 +173,7 @@ EOF if [[ ${WITH_GPU} == "ON" ]]; then NCCL_DEPS="apt-get install -y libnccl-dev &&" else - NCCL_DEPS="" + NCCL_DEPS="" fi cat >> /paddle/build/Dockerfile <