rpc_server.h 3.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

X
Xin Pan 已提交
17
#include <atomic>
18 19 20 21 22
#include <set>
#include <string>
#include <thread>  // NOLINT
#include <utility>
#include <vector>
23

24
#include "paddle/fluid/framework/scope.h"
25
#include "paddle/fluid/operators/distributed/request_handler.h"
26
#include "paddle/fluid/platform/device_context.h"
27 28 29

namespace paddle {
namespace operators {
30
namespace distributed {
31

32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
struct MonomerHandle {
  std::string var_name_;
  std::string rpc_name_;
  framework::Scope* scope_{nullptr};
  platform::DeviceContext* dev_ctx_{nullptr};
  int64_t barrier_{0};

  std::string String() {
    std::stringstream ss;
    ss << "var_name:" << var_name_ << ", rpc_name:" << rpc_name_
       << ", scope:" << scope_ << ", dev_ctx:" << dev_ctx_
       << ", barrier_:" << barrier_;
    return ss.str();
  }
};

48 49 50 51 52 53 54
class RPCServer {
 public:
  explicit RPCServer(const std::string& address, int client_num)
      : cur_cond_(0),
        bind_address_(address),
        exit_flag_(false),
        selected_port_(0),
Y
Yancey1989 已提交
55 56
        client_num_(client_num),
        need_reset_all_vars_(false) {}
57 58 59 60 61 62 63 64 65 66

  virtual ~RPCServer() {}
  virtual void StartServer() = 0;
  virtual void WaitServerReady() = 0;

  void ShutDown();

  bool IsExit() { return exit_flag_.load(); }

  int GetSelectedPort() const { return selected_port_; }
Y
Yancey1989 已提交
67

Y
Yancey1989 已提交
68
  int GetClientNum();
Y
Yancey1989 已提交
69

70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
  void SavePort() const;

  // RegisterRPC, register the rpc method name to a handler
  // class, and auto generate a condition id for this call
  // to be used for the barrier.
  void RegisterRPC(const std::string& rpc_name, RequestHandler* handler,
                   int thread_num = 5);

  // Wait util all the clients have reached the barrier for one
  // rpc method. This function should be called in the
  // RequestHandler if you want to run the server/client in a
  // synchronous mode.
  void WaitBarrier(const std::string& rpc_name);

  void SetCond(const std::string& rpc_name);
  void WaitCond(const std::string& rpc_name);
  void IncreaseBatchBarrier(const std::string rpc_name);
Y
Yancey1989 已提交
87

88 89 90 91 92 93 94 95 96 97
  void RegisterVar(const std::string& var_name, const std::string& rpc_name,
                   framework::Scope* scope, platform::DeviceContext* dev_ctx);
  void IncreaseVarBarrier(const std::string& var_name);
  void WaitVarBarrier(const std::string& var_name);
  void SetVarCond(const std::string& var_name);
  void WaitVarCond(const std::string& var_name);
  void ClearRegisteredVars();
  void ClearVar(const std::string& var_name);
  MonomerHandle GetMonomer(const std::string& var_name);

Y
Yancey1989 已提交
98
  void Complete();
Y
Yancey1989 已提交
99

100 101
  void ResetBarrierCounter();

Y
Yancey1989 已提交
102 103
  bool NeedResetAllVars();

104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
 protected:
  virtual void ShutDownImpl() = 0;

 private:
  std::mutex mutex_;
  std::unordered_map<std::string, int> barrier_counter_;
  std::condition_variable barrier_cond_;

  std::unordered_map<std::string, int> rpc_cond_map_;
  std::atomic<int> cur_cond_;
  std::condition_variable rpc_cond_;

 protected:
  std::string bind_address_;
  std::atomic<int> exit_flag_;
  int selected_port_;
W
Wu Yi 已提交
120
  int client_num_;
Y
Yancey1989 已提交
121
  bool need_reset_all_vars_;
122 123 124 125

  std::unordered_map<std::string, RequestHandler*> rpc_call_map_;
  std::unordered_map<std::string, int> rpc_thread_num_;
  friend class RequestHandler;
126 127 128

  // TODO(gongwb): use more cond to notify or wait;
  std::unordered_map<std::string, MonomerHandle> var_map_;
129 130
};

131
};  // namespace distributed
132 133
};  // namespace operators
};  // namespace paddle