diff --git a/Dockerfile b/Dockerfile index b6f99ca539d077164c71d797a5ccda7b1b5c44ba..39af60966b6cab7d8b9e644f4ea658613f8ba518 100644 --- a/Dockerfile +++ b/Dockerfile @@ -30,7 +30,8 @@ RUN apt-get update && \ python-numpy python-matplotlib gcc g++ \ automake locales clang-format-3.8 swig doxygen cmake \ liblapack-dev liblapacke-dev libboost-dev \ - clang-3.8 llvm-3.8 libclang-3.8-dev && \ + clang-3.8 llvm-3.8 libclang-3.8-dev \ + net-tools && \ apt-get clean -y # Install Go diff --git a/paddle/pserver/LightNetwork.cpp b/paddle/pserver/LightNetwork.cpp index 8c8ba0a2e51b85bde0544c6780b07130336a6bdd..922f25734dee0a6db7fbcfcef3d29d2bad5b7858 100644 --- a/paddle/pserver/LightNetwork.cpp +++ b/paddle/pserver/LightNetwork.cpp @@ -383,20 +383,23 @@ void SocketClient::TcpClient(const std::string &serverAddr, int serverPort) { setOption(sockfd); /// Now connect to the server - int retry_second = 0; - int error = 0; + int retry_count = 0; do { - error = connect(sockfd, (sockaddr *)&serv_addr, sizeof(serv_addr)); - if (error == ECONNREFUSED) { + if (connect(sockfd, (sockaddr *)&serv_addr, sizeof(serv_addr)) == 0) { + break; + } + + if (errno == ECONNREFUSED) { LOG(WARNING) << "connection refused by pserver, try again!"; - if (retry_second++ >= 7) { + if (retry_count++ >= 7) { LOG(FATAL) << "connection refused by pserver, maybe pserver failed!"; } std::this_thread::sleep_for(std::chrono::seconds(1)); } else { - PCHECK(error >= 0) << "ERROR connecting to " << serverAddr; + PCHECK(errno != 0) << "ERROR connecting to " << serverAddr << ":" + << serverPort << "errorno: " << errno; } - } while (error == ECONNREFUSED); + } while (errno == ECONNREFUSED); channel_.reset(new SocketChannel(sockfd, serverAddr)); tcpRdma_ = F_TCP;