未验证 提交 87f4311a 编写于 作者: Y Yang Yang(Tony) 提交者: GitHub

compile with nccl2 (#8411)

* compile with nccl2

* add ncclGroup; it is necessary in nccl2

* add back libnccl-dev
上级 dafc7e36
...@@ -142,7 +142,6 @@ include(external/boost) # download boost ...@@ -142,7 +142,6 @@ include(external/boost) # download boost
include(external/any) # download libn::any include(external/any) # download libn::any
include(external/eigen) # download eigen3 include(external/eigen) # download eigen3
include(external/pybind11) # download pybind11 include(external/pybind11) # download pybind11
include(external/nccl)
include(external/cares) include(external/cares)
include(external/grpc) include(external/grpc)
......
if(WITH_GPU) if(WITH_GPU)
cc_library(enforce SRCS enforce.cc DEPS nccl) cc_library(enforce SRCS enforce.cc DEPS)
else() else()
cc_library(enforce SRCS enforce.cc) cc_library(enforce SRCS enforce.cc)
endif() endif()
......
cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags enforce) cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags enforce)
nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc nccl.cc nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc nccl.cc
DEPS dynamic_loader nccl) DEPS dynamic_loader)
cc_library(dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc) cc_library(dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc)
...@@ -89,6 +89,7 @@ TEST(NCCL, all_reduce) { ...@@ -89,6 +89,7 @@ TEST(NCCL, all_reduce) {
VLOG(1) << "Invoking ncclAllReduce"; VLOG(1) << "Invoking ncclAllReduce";
dynload::ncclGroupStart();
for (int i = 0; i < dev_count; ++i) { for (int i = 0; i < dev_count; ++i) {
VLOG(1) << "Invoking ncclAllReduce with device " << i; VLOG(1) << "Invoking ncclAllReduce with device " << i;
SetDeviceId(i); SetDeviceId(i);
...@@ -97,6 +98,7 @@ TEST(NCCL, all_reduce) { ...@@ -97,6 +98,7 @@ TEST(NCCL, all_reduce) {
ncclSum, comms[i], data[i]->dev_ctx.stream())); ncclSum, comms[i], data[i]->dev_ctx.stream()));
VLOG(1) << "Invoked ncclAllReduce for device " << i; VLOG(1) << "Invoked ncclAllReduce for device " << i;
} }
dynload::ncclGroupEnd();
VLOG(1) << "Invoked ncclAllReduce"; VLOG(1) << "Invoked ncclAllReduce";
......
...@@ -34,6 +34,7 @@ function cmake_gen() { ...@@ -34,6 +34,7 @@ function cmake_gen() {
Configuring cmake in /paddle/build ... Configuring cmake in /paddle/build ...
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release}
${PYTHON_FLAGS} ${PYTHON_FLAGS}
-DWITH_DSO=ON
-DWITH_DOC=OFF -DWITH_DOC=OFF
-DWITH_GPU=${WITH_GPU:-OFF} -DWITH_GPU=${WITH_GPU:-OFF}
-DWITH_DISTRIBUTE=${WITH_DISTRIBUTE:-OFF} -DWITH_DISTRIBUTE=${WITH_DISTRIBUTE:-OFF}
...@@ -57,6 +58,7 @@ EOF ...@@ -57,6 +58,7 @@ EOF
cmake .. \ cmake .. \
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release} \ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release} \
${PYTHON_FLAGS} \ ${PYTHON_FLAGS} \
-DWITH_DSO=ON \
-DWITH_DOC=OFF \ -DWITH_DOC=OFF \
-DWITH_GPU=${WITH_GPU:-OFF} \ -DWITH_GPU=${WITH_GPU:-OFF} \
-DWITH_DISTRIBUTE=${WITH_DISTRIBUTE:-OFF} \ -DWITH_DISTRIBUTE=${WITH_DISTRIBUTE:-OFF} \
...@@ -187,6 +189,7 @@ EOF ...@@ -187,6 +189,7 @@ EOF
ldconfig ldconfig
${DOCKERFILE_CUDNN_DSO} ${DOCKERFILE_CUDNN_DSO}
${DOCKERFILE_GPU_ENV} ${DOCKERFILE_GPU_ENV}
ENV NCCL_LAUNCH_MODE PARALLEL
ADD go/cmd/pserver/pserver /usr/bin/ ADD go/cmd/pserver/pserver /usr/bin/
ADD go/cmd/master/master /usr/bin/ ADD go/cmd/master/master /usr/bin/
# default command shows the paddle version and exit # default command shows the paddle version and exit
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册