diff --git a/CMakeLists.txt b/CMakeLists.txt index b216429272ac72542009578383a01aea0594ae10..c23be24c6c8971746f0e7fce7209da3dd3b667ec 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,6 +64,7 @@ option(WITH_CONTRIB "Compile the third-party contributation" OFF) option(REPLACE_ENFORCE_GLOG "Replace PADDLE_ENFORCE with glog/CHECK for better debug." OFF) option(WITH_ANAKIN "Compile with Anakin library" OFF) option(WITH_GRPC "Use grpc as the default rpc framework" ${WITH_DISTRIBUTE}) +option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF) # CMAKE_BUILD_TYPE if(NOT CMAKE_BUILD_TYPE) @@ -158,12 +159,24 @@ include(external/cares) if(WITH_DISTRIBUTE) if(WITH_GRPC) include(external/grpc) + message(STATUS "Use grpc framework.") else() + message(STATUS "Use brpc framework.") include(external/leveldb) include(external/brpc) endif() endif() +if(WITH_BRPC_RDMA) + message(STATUS "Use brpc with rdma.") + if(WITH_GRPC) + message(FATAL_ERROR "Can't use grpc with brpc rdma.") + endif() + if(NOT WITH_DISTRIBUTE) + message(FATAL_ERROR "Can't use brpc rdma in no distribute env.") + endif() +endif() + include(external/snappy) # download snappy include(external/snappystream) include(external/threadpool) diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 6a8b15a6b60a2e5635dc78fc877f0c8da9a2a998..e4af34d10ed92c501dd805addb62747c91c00978 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -174,3 +174,7 @@ endif(WITH_GOLANG) if(WITH_GRPC) add_definitions(-DPADDLE_WITH_GRPC) endif(WITH_GRPC) + +if(WITH_BRPC_RDMA) + add_definitions(-DPADDLE_WITH_BRPC_RDMA) +endif(WITH_BRPC_RDMA) diff --git a/cmake/external/brpc.cmake b/cmake/external/brpc.cmake index 8e2c913b2caae0c4eeb844d2b51a8975e81c1592..30b227b6452abf44171a1a4e04569e66b16e67a4 100644 --- a/cmake/external/brpc.cmake +++ b/cmake/external/brpc.cmake @@ -14,6 +14,15 @@ INCLUDE(ExternalProject) +find_library(SSL_LIBRARY NAMES ssl) +ADD_LIBRARY(ssl SHARED IMPORTED GLOBAL) +SET_PROPERTY(TARGET ssl PROPERTY IMPORTED_LOCATION ${SSL_LIBRARY}) + +find_library(CRYPTO_LIBRARY NAMES crypto) +ADD_LIBRARY(crypto SHARED IMPORTED GLOBAL) +SET_PROPERTY(TARGET crypto PROPERTY IMPORTED_LOCATION ${CRYPTO_LIBRARY}) + + SET(BRPC_SOURCES_DIR ${THIRD_PARTY_PATH}/brpc) SET(BRPC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/brpc) SET(BRPC_INCLUDE_DIR "${BRPC_INSTALL_DIR}/include" CACHE PATH "brpc include directory." FORCE) @@ -22,14 +31,14 @@ SET(BRPC_LIBRARIES "${BRPC_INSTALL_DIR}/lib/libbrpc.a" CACHE FILEPATH "brpc libr INCLUDE_DIRECTORIES(${BRPC_INCLUDE_DIR}) # Reference https://stackoverflow.com/questions/45414507/pass-a-list-of-prefix-paths-to-externalproject-add-in-cmake-args -set(prefix_path "${THIRD_PARTY_PATH}/install/gflags|${THIRD_PARTY_PATH}/install/leveldb|${THIRD_PARTY_PATH}/install/snappy|${THIRD_PARTY_PATH}/install/gtest|${THIRD_PARTY_PATH}/install/protobuf") +set(prefix_path "${THIRD_PARTY_PATH}/install/gflags|${THIRD_PARTY_PATH}/install/leveldb|${THIRD_PARTY_PATH}/install/snappy|${THIRD_PARTY_PATH}/install/gtest|${THIRD_PARTY_PATH}/install/protobuf|${THIRD_PARTY_PATH}/install/zlib") # If minimal .a is need, you can set WITH_DEBUG_SYMBOLS=OFF ExternalProject_Add( extern_brpc ${EXTERNAL_PROJECT_LOG_ARGS} - GIT_REPOSITORY "https://github.com/brpc/brpc" - GIT_TAG "6d153dd7ff00f960ae6895c9c5fff0ce9f07aff2" + GIT_REPOSITORY "https://github.com/gongweibao/brpc" + GIT_TAG "7dc04defad1fd4173aae170c3fcbde131b65155a" PREFIX ${BRPC_SOURCES_DIR} UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} @@ -42,6 +51,8 @@ ExternalProject_Add( -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_PREFIX_PATH=${prefix_path} -DBRPC_WITH_GLOG=ON + -DIOBUF_WITH_HUGE_BLOCK=ON + -DBRPC_WITH_RDMA=${WITH_BRPC_RDMA} ${EXTERNAL_OPTIONAL_ARGS} LIST_SEPARATOR | CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${BRPC_INSTALL_DIR} @@ -49,7 +60,7 @@ ExternalProject_Add( -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} ) -ADD_DEPENDENCIES(extern_brpc protobuf leveldb gflags glog gtest snappy) +ADD_DEPENDENCIES(extern_brpc protobuf ssl crypto leveldb gflags glog gtest snappy) ADD_LIBRARY(brpc STATIC IMPORTED GLOBAL) SET_PROPERTY(TARGET brpc PROPERTY IMPORTED_LOCATION ${BRPC_LIBRARIES}) ADD_DEPENDENCIES(brpc extern_brpc) diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc index ae98fccc9600a2a75f12fa516c982bec0ef13f9f..261e9c5a8c0f905e2d4492839d6e88ba93ff1988 100644 --- a/paddle/fluid/framework/executor.cc +++ b/paddle/fluid/framework/executor.cc @@ -20,9 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/reader.h" -#ifdef PADDLE_WITH_DISTRIBUTE -#include "paddle/fluid/operators/distributed/grpc_client.h" -#endif +#include "paddle/fluid/operators/detail/macros.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/profiler.h" @@ -49,8 +47,7 @@ Executor::Executor(const platform::Place& place) : place_(place) {} #ifdef PADDLE_WITH_DISTRIBUTE void Executor::Complete() { - ::paddle::operators::distributed::RPCClient::GetInstance< - ::paddle::operators::distributed::GRPCClient>() + ::paddle::operators::distributed::RPCClient::GetInstance() ->SendComplete(); } #endif diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 9dc39ad0ddf8c5de3e1960a1171431e026de35ae..ab1d2143330fb8cbfd535758a83bc71de939c4e0 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -184,6 +184,7 @@ else() set(DEPS_OPS ${DEPS_OPS} nccl_op) endif() +set(DISTRIBUTE_DEPS "") if(WITH_DISTRIBUTE) add_subdirectory(distributed) @@ -192,6 +193,18 @@ if(WITH_DISTRIBUTE) set(DISTRIBUTE_DEPS sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr cares zlib protobuf) else() set(DISTRIBUTE_DEPS sendrecvop_brpc brpc leveldb snappystream snappy protobuf ssl crypto zlib) + if(WITH_BRPC_RDMA) + find_library(IBVERBS_LIBRARY NAMES ibverbs) + ADD_LIBRARY(ibverbs SHARED IMPORTED GLOBAL) + SET_PROPERTY(TARGET ibverbs PROPERTY IMPORTED_LOCATION ${IBVERBS_LIBRARY}) + + + find_library(RDMACM_LIBRARY NAMES rdmacm) + ADD_LIBRARY(rdmacm SHARED IMPORTED GLOBAL) + SET_PROPERTY(TARGET rdmacm PROPERTY IMPORTED_LOCATION ${RDMACM_LIBRARY}) + + set(DISTRIBUTE_DEPS ${DISTRIBUTE_DEPS} ibverbs rdmacm) + endif() endif() set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") @@ -205,7 +218,7 @@ if(WITH_DISTRIBUTE) # listen_and_serv_op sum_op executor SERIAL) if(WITH_GPU) set_source_files_properties(test_send_nccl_id.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - cc_test(test_send_nccl_id SRCS test_send_nccl_id.cc DEPS listen_and_serv_op executor SERIAL) + cc_test(test_send_nccl_id SRCS test_send_nccl_id.cc DEPS listen_and_serv_op ${DISTRIBUTE_DEPS} executor SERIAL) if(WITH_GRPC) op_library(gen_nccl_id_op DEPS nccl_common sendrecvop_grpc) else() @@ -297,6 +310,7 @@ foreach(src ${DETECTION_LIBRARY}) endforeach() set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library") +set(GLOB_DISTRIBUTE_DEPS ${DISTRIBUTE_DEPS} CACHE INTERNAL "distributed dependency") cc_test(gather_test SRCS gather_test.cc DEPS tensor) cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) diff --git a/paddle/fluid/operators/detail/macros.h b/paddle/fluid/operators/detail/macros.h index b9e385994efcea0388756e8bd780ebfc719ed08d..6f4a15caa5542a45cd8e26a72b055ca8948069d0 100644 --- a/paddle/fluid/operators/detail/macros.h +++ b/paddle/fluid/operators/detail/macros.h @@ -14,14 +14,22 @@ #pragma once +#ifdef PADDLE_WITH_DISTRIBUTE + #ifdef PADDLE_WITH_GRPC + #include "paddle/fluid/operators/distributed/grpc_client.h" #include "paddle/fluid/operators/distributed/grpc_server.h" -#define RPCSERVER_T distributed::AsyncGRPCServer -#define RPCCLIENT_T distributed::GRPCClient -#else +#define RPCSERVER_T paddle::operators::distributed::AsyncGRPCServer +#define RPCCLIENT_T paddle::operators::distributed::GRPCClient + +#else // PADDLE_WITH_GRPC + #include "paddle/fluid/operators/distributed/brpc_client.h" #include "paddle/fluid/operators/distributed/brpc_server.h" -#define RPCSERVER_T distributed::AsyncBRPCServer -#define RPCCLIENT_T distributed::BRPCClient -#endif +#define RPCSERVER_T paddle::operators::distributed::AsyncBRPCServer +#define RPCCLIENT_T paddle::operators::distributed::BRPCClient + +#endif // PADDLE_WITH_GRPC + +#endif // PADDLE_WITH_DISTRIBUTE