LightNetwork.h 4.9 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Z
zhangjinchao01 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#include "SocketChannel.h"

Y
Yu Yang 已提交
19
#include <atomic>
Z
zhangjinchao01 已提交
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
#include <memory>
#include <thread>
#include <vector>

#include "paddle/utils/Thread.h"

struct sxi_socket;

namespace paddle {

class SocketWorker;

/**
 * @brief class for holding all parameters processing for current port
 *
 * @note  each parameter server inherits from one socket server, each
 *        server contains serveral woker threads which are to parallelize
 *        the processing of computation, but share some common datas stored
 *        in child class of socketserver.
 */
class SocketServer : public Thread {
41 42 43
  // rdmaCpu controls the cpu affinity of RDMA server daemon,
  // which could benifit performance. rdmaCpu = -1 means TCP
  // is used instead of RDMA transport.
W
Wu Yi 已提交
44
 public:
Z
zhangjinchao01 已提交
45 46 47 48 49 50 51 52
  SocketServer(const std::string& addr, int port, int rdmaCpu);
  ~SocketServer();

  virtual void run();

  typedef std::function<void(const std::vector<iovec>& outputIovs)>
      ResponseCallback;

W
Wu Yi 已提交
53
 protected:
Z
zhangjinchao01 已提交
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
  //
  // The derived class needs to implement this function
  // to handle the request received by SocketWorker
  // The request is encapsulated by MsgReader, which contains
  // a set of blocks.
  virtual void handleRequest(std::unique_ptr<MsgReader> msgReader,
                             ResponseCallback callback) = 0;

  std::unique_ptr<SocketChannel> createChannel(int sock,
                                               const std::string& peerName) {
    return std::unique_ptr<SocketChannel>(new SocketChannel(sock, peerName));
  }
  std::unique_ptr<SocketChannel> createChannel(struct sxi_sock* sock,
                                               const std::string& peerName) {
    return std::unique_ptr<SocketChannel>(new SocketChannel(sock, peerName));
  }

  friend class SocketWorker;

W
Wu Yi 已提交
73
 private:
Z
zhangjinchao01 已提交
74 75 76 77 78
  void rdmaServer();
  void tcpServer();

  void detach() {}  // detach accept thread is forbidden

W
Wu Yi 已提交
79
 protected:
Z
zhangjinchao01 已提交
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
  enum ChannelType tcpRdma_;
  // for rdma
  int rdmaCpu_;
  std::string rdmaUri_;
  sxi_socket* rdmaSocket_;
  // for tcp
  int port_;
  std::string addr_;
  int socket_;
  int maxPendingConnections_;
  bool stopping_;
};

/**
 * @brief class for holding one connection from one trainer
 *
 * @note  all parameter processing will run in the context of this worker
 */
class SocketWorker : public Thread {
W
Wu Yi 已提交
99
 public:
Z
zhangjinchao01 已提交
100 101 102 103 104 105 106
  SocketWorker(std::unique_ptr<SocketChannel>&& channel, SocketServer* server)
      : channel_(std::move(channel)), server_(server) {}

  virtual ~SocketWorker() {}

  virtual void run();

W
Wu Yi 已提交
107
 protected:
Z
zhangjinchao01 已提交
108 109 110 111 112 113 114 115 116 117 118 119 120
  std::unique_ptr<SocketChannel> channel_;
  SocketServer* server_;
  enum ChannelType tcpRdma_;
};

/**
 * @brief class for providing rdma client deamon thread
 *
 * @note  the deamons are required by sock like rdam library. Here
 *        use singleton model for daemons. Each deamon hosts in
 *        single cpu core for better load balance performance
 */
class RdmaClientDaemons {
W
Wu Yi 已提交
121
 private:
Z
zhangjinchao01 已提交
122 123 124 125
  RdmaClientDaemons();

  static std::unique_ptr<RdmaClientDaemons> daemons_;

W
Wu Yi 已提交
126
 public:
Z
zhangjinchao01 已提交
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
  static RdmaClientDaemons* get() {
    std::call_once(RdmaClientDaemons::initDataFlag_,
                   &RdmaClientDaemons::getInstance);

    return daemons_.get();
  }

  struct sxi_socket* selectDaemon() {
    int cpu = curCpu_;
    curCpu_ = (curCpu_ + 1) % onlineCpus_;

    LOG(INFO) << "select daemon " << cpu << "onlineCpus_ " << onlineCpus_;
    return rdmaClientSocket_[cpu];
  }

  ~RdmaClientDaemons();

W
Wu Yi 已提交
144
 public:
Z
zhangjinchao01 已提交
145 146
  friend class SocketClient;

W
Wu Yi 已提交
147
 private:
Z
zhangjinchao01 已提交
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
  static std::once_flag initDataFlag_;
  static void getInstance() {
    if (!daemons_.get()) daemons_.reset(new RdmaClientDaemons());
  }

  std::vector<struct sxi_socket*> rdmaClientSocket_;
  std::atomic<int> curCpu_;
  int onlineCpus_;
};

/**
 * @brief management for client connection which are from trainers
 *
 * @note  it contains one channel descriptor which used to write and
 *        read data
 */
class SocketClient {
W
Wu Yi 已提交
165
 public:
166 167
  SocketClient(const std::string& serverAddr,
               int serverPort,
Z
zhangjinchao01 已提交
168 169 170 171
               enum ChannelType channelType);

  SocketChannel* getChannel() { return channel_.get(); }

W
Wu Yi 已提交
172
 protected:
Z
zhangjinchao01 已提交
173 174 175 176
  std::unique_ptr<SocketChannel> channel_;
  struct sxi_socket* socketDaemon_;
  enum ChannelType tcpRdma_;

W
Wu Yi 已提交
177
 private:
Z
zhangjinchao01 已提交
178 179 180 181 182 183 184 185
  void RdmaClient(const std::string& serverAddr, int serverPort);
  void TcpClient(const std::string& serverAddr, int serverPort);
};

std::string getIpAddr(std::string& device);
void setOption(int sockfd);

}  // namespace paddle