rpc_server.h 3.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

X
Xin Pan 已提交
17
#include <atomic>
18 19 20 21 22
#include <set>
#include <string>
#include <thread>  // NOLINT
#include <utility>
#include <vector>
23

24
#include "paddle/fluid/operators/distributed/request_handler.h"
25

26 27 28
DECLARE_int32(rpc_server_profile_period);
DECLARE_string(rpc_server_profile_path);

29 30
namespace paddle {
namespace operators {
31
namespace distributed {
32

33 34 35 36 37 38 39 40 41 42 43
class RPCServerProfiler {
 public:
  RPCServerProfiler(int profile_period, const std::string& profile_log_path);
  void OneStep();

 private:
  const int profile_period_;
  std::string profile_log_path_;
  int step_;
};

44 45 46 47
class RPCServer {
 public:
  explicit RPCServer(const std::string& address, int client_num)
      : cur_cond_(0),
48 49
        profiler_(FLAGS_rpc_server_profile_period,
                  FLAGS_rpc_server_profile_path),
50 51 52
        bind_address_(address),
        exit_flag_(false),
        selected_port_(0),
Y
Yancey1989 已提交
53 54
        client_num_(client_num),
        need_reset_all_vars_(false) {}
55 56 57 58 59 60 61 62 63 64

  virtual ~RPCServer() {}
  virtual void StartServer() = 0;
  virtual void WaitServerReady() = 0;

  void ShutDown();

  bool IsExit() { return exit_flag_.load(); }

  int GetSelectedPort() const { return selected_port_; }
Y
Yancey1989 已提交
65

Y
Yancey1989 已提交
66
  int GetClientNum();
Y
Yancey1989 已提交
67

68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
  void SavePort() const;

  // RegisterRPC, register the rpc method name to a handler
  // class, and auto generate a condition id for this call
  // to be used for the barrier.
  void RegisterRPC(const std::string& rpc_name, RequestHandler* handler,
                   int thread_num = 5);

  // Wait util all the clients have reached the barrier for one
  // rpc method. This function should be called in the
  // RequestHandler if you want to run the server/client in a
  // synchronous mode.
  void WaitBarrier(const std::string& rpc_name);

  void SetCond(const std::string& rpc_name);
  void WaitCond(const std::string& rpc_name);
  void IncreaseBatchBarrier(const std::string rpc_name);
Y
Yancey1989 已提交
85

Y
Yancey1989 已提交
86
  void Complete();
Y
Yancey1989 已提交
87

88
  void ResetBarrierCounter();
89
  RPCServerProfiler& Profiler() { return profiler_; }
90

Y
Yancey1989 已提交
91 92
  bool NeedResetAllVars();

93 94 95 96 97 98 99 100 101 102 103
 protected:
  virtual void ShutDownImpl() = 0;

 private:
  std::mutex mutex_;
  std::unordered_map<std::string, int> barrier_counter_;
  std::condition_variable barrier_cond_;

  std::unordered_map<std::string, int> rpc_cond_map_;
  std::atomic<int> cur_cond_;
  std::condition_variable rpc_cond_;
104
  RPCServerProfiler profiler_;
105 106 107 108 109

 protected:
  std::string bind_address_;
  std::atomic<int> exit_flag_;
  int selected_port_;
W
Wu Yi 已提交
110
  int client_num_;
Y
Yancey1989 已提交
111
  bool need_reset_all_vars_;
112 113 114 115 116 117

  std::unordered_map<std::string, RequestHandler*> rpc_call_map_;
  std::unordered_map<std::string, int> rpc_thread_num_;
  friend class RequestHandler;
};

118
};  // namespace distributed
119 120
};  // namespace operators
};  // namespace paddle