rpc_server.h 4.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

X
Xin Pan 已提交
17
#include <atomic>
18 19 20
#include <set>
#include <string>
#include <thread>  // NOLINT
W
wanghuancoder 已提交
21
#include <unordered_map>
22 23
#include <utility>
#include <vector>
24

25
#include "paddle/fluid/framework/scope.h"
26
#include "paddle/fluid/operators/distributed/request_handler.h"
27
#include "paddle/fluid/platform/device_context.h"
28

W
wanghuancoder 已提交
29 30 31 32 33 34 35 36 37
namespace paddle {
namespace framework {
class Scope;
}  // namespace framework
namespace platform {
class DeviceContext;
}  // namespace platform
}  // namespace paddle

38 39
namespace paddle {
namespace operators {
40
namespace distributed {
41

W
wanghuancoder 已提交
42 43
class RequestHandler;

44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
struct MonomerHandle {
  std::string var_name_;
  std::string rpc_name_;
  framework::Scope* scope_{nullptr};
  platform::DeviceContext* dev_ctx_{nullptr};
  int64_t barrier_{0};

  std::string String() {
    std::stringstream ss;
    ss << "var_name:" << var_name_ << ", rpc_name:" << rpc_name_
       << ", scope:" << scope_ << ", dev_ctx:" << dev_ctx_
       << ", barrier_:" << barrier_;
    return ss.str();
  }
};

60 61 62 63 64 65 66
class RPCServer {
 public:
  explicit RPCServer(const std::string& address, int client_num)
      : cur_cond_(0),
        bind_address_(address),
        exit_flag_(false),
        selected_port_(0),
Y
Yancey1989 已提交
67 68
        client_num_(client_num),
        need_reset_all_vars_(false) {}
69 70 71 72 73 74 75 76 77 78

  virtual ~RPCServer() {}
  virtual void StartServer() = 0;
  virtual void WaitServerReady() = 0;

  void ShutDown();

  bool IsExit() { return exit_flag_.load(); }

  int GetSelectedPort() const { return selected_port_; }
Y
Yancey1989 已提交
79

Y
Yancey1989 已提交
80
  int GetClientNum();
Y
Yancey1989 已提交
81

82 83 84 85 86 87
  void SavePort() const;

  // RegisterRPC, register the rpc method name to a handler
  // class, and auto generate a condition id for this call
  // to be used for the barrier.
  void RegisterRPC(const std::string& rpc_name, RequestHandler* handler,
1
123malin 已提交
88
                   int thread_num = 1);
89

90 91 92 93
  int GetThreadNum(const std::string& rpc_name) {
    return rpc_thread_num_[rpc_name];
  }

94 95 96 97 98 99 100 101 102
  // Wait util all the clients have reached the barrier for one
  // rpc method. This function should be called in the
  // RequestHandler if you want to run the server/client in a
  // synchronous mode.
  void WaitBarrier(const std::string& rpc_name);

  void SetCond(const std::string& rpc_name);
  void WaitCond(const std::string& rpc_name);
  void IncreaseBatchBarrier(const std::string rpc_name);
Y
Yancey1989 已提交
103

104 105 106 107 108 109 110 111 112 113
  void RegisterVar(const std::string& var_name, const std::string& rpc_name,
                   framework::Scope* scope, platform::DeviceContext* dev_ctx);
  void IncreaseVarBarrier(const std::string& var_name);
  void WaitVarBarrier(const std::string& var_name);
  void SetVarCond(const std::string& var_name);
  void WaitVarCond(const std::string& var_name);
  void ClearRegisteredVars();
  void ClearVar(const std::string& var_name);
  MonomerHandle GetMonomer(const std::string& var_name);

Y
Yancey1989 已提交
114
  void Complete();
Y
Yancey1989 已提交
115

116 117
  void ResetBarrierCounter();

Y
Yancey1989 已提交
118 119
  bool NeedResetAllVars();

120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
 protected:
  virtual void ShutDownImpl() = 0;

 private:
  std::mutex mutex_;
  std::unordered_map<std::string, int> barrier_counter_;
  std::condition_variable barrier_cond_;

  std::unordered_map<std::string, int> rpc_cond_map_;
  std::atomic<int> cur_cond_;
  std::condition_variable rpc_cond_;

 protected:
  std::string bind_address_;
  std::atomic<int> exit_flag_;
  int selected_port_;
W
Wu Yi 已提交
136
  int client_num_;
Y
Yancey1989 已提交
137
  bool need_reset_all_vars_;
138 139 140 141

  std::unordered_map<std::string, RequestHandler*> rpc_call_map_;
  std::unordered_map<std::string, int> rpc_thread_num_;
  friend class RequestHandler;
142 143 144

  // TODO(gongwb): use more cond to notify or wait;
  std::unordered_map<std::string, MonomerHandle> var_map_;
145 146
};

147
};  // namespace distributed
148 149
};  // namespace operators
};  // namespace paddle