interceptor.h 4.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <condition_variable>
18
#include <functional>
19 20 21 22 23 24 25
#include <map>
#include <memory>
#include <queue>
#include <thread>
#include <vector>

#include "paddle/fluid/distributed/fleet_executor/interceptor_message.pb.h"
26 27
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/errors.h"
28 29 30 31 32 33 34 35
#include "paddle/fluid/platform/macros.h"

namespace paddle {
namespace distributed {

class TaskNode;

class Interceptor {
36
 public:
37
  using MsgHandle = std::function<void(const InterceptorMessage&)>;
38

39 40 41
 public:
  Interceptor() = delete;

42
  Interceptor(int64_t interceptor_id, TaskNode* node);
43

44
  virtual ~Interceptor();
45

46 47
  void Join();

48
  // register interceptor handle
49
  void RegisterMsgHandle(MsgHandle handle);
50

51 52
  virtual void HandleStop(const InterceptorMessage& msg);

53 54
  void Handle(const InterceptorMessage& msg);

55 56 57
  // return the interceptor id
  int64_t GetInterceptorId() const;

58 59 60
  // return the conditional var
  std::condition_variable& GetCondVar();

61 62 63 64
  // Called by Carrier, enqueue an InterceptorMessage to remote mailbox
  bool EnqueueRemoteInterceptorMessage(
      const InterceptorMessage& interceptor_message);

65
  bool Send(int64_t dst_id, InterceptorMessage& msg);  // NOLINT
66

67 68
  DISABLE_COPY_AND_ASSIGN(Interceptor);

69 70
 protected:
  TaskNode* GetTaskNode() const { return node_; }
71
  bool stop_{false};
72

73 74 75 76 77 78 79 80 81 82 83 84 85 86
 private:
  // pool the local mailbox, parse the Message
  void PoolTheMailbox();

  // fetch all Message from remote mailbox to local mailbox
  // return true if remote mailbox not empty, otherwise return false
  bool FetchRemoteMailbox();

  // interceptor id, handed from above layer
  int64_t interceptor_id_;

  // node need to be handled by this interceptor
  TaskNode* node_;

87
  // interceptor handle which process message
88
  MsgHandle handle_{nullptr};
89

90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
  // mutex to control read/write conflict for remote mailbox
  std::mutex remote_mailbox_mutex_;

  // interceptor runs PoolTheMailbox() function to poll local mailbox
  std::thread interceptor_thread_;

  // conditional variable for blocking the thread when
  // fetch an empty remote mailbox
  std::condition_variable cond_var_;

  // remote mailbox, written by EnqueueRemoteMessage()
  // read by FetchRemoteMailbox()
  std::queue<InterceptorMessage> remote_mailbox_;

  // local mailbox, written by FetchRemoteMailbox()
  // read by PoolTheMailbox()
  std::queue<InterceptorMessage> local_mailbox_;
107 108 109

  int64_t already_run_times_{0};
  int64_t used_slot_nums_{0};
110 111
};

112 113 114 115 116 117 118 119 120 121 122 123 124
class InterceptorFactory {
 public:
  using CreateInterceptorFunc = std::unique_ptr<Interceptor> (*)(int64_t,
                                                                 TaskNode*);
  using CreateInterceptorMap =
      std::unordered_map<std::string, CreateInterceptorFunc>;

  static void Register(const std::string& type, CreateInterceptorFunc func);

  static std::unique_ptr<Interceptor> Create(const std::string& type,
                                             int64_t id, TaskNode* node);
};

125 126 127 128 129
template <typename InterceptorClass>
std::unique_ptr<Interceptor> CreatorInterceptor(int64_t id, TaskNode* node) {
  return std::make_unique<InterceptorClass>(id, node);
}

130 131 132 133 134
#define REGISTER_INTERCEPTOR(interceptor_type, interceptor_class)          \
  class __RegisterInterceptor_##interceptor_type {                         \
   public:                                                                 \
    __RegisterInterceptor_##interceptor_type() {                           \
      InterceptorFactory::Register(#interceptor_type,                      \
135
                                   CreatorInterceptor<interceptor_class>); \
136
    }                                                                      \
137
    void Touch() {}                                                        \
138
  };                                                                       \
139 140 141 142 143 144 145 146 147 148
  __RegisterInterceptor_##interceptor_type g_register_##interceptor_type;  \
  int TouchRegisterInterceptor_##interceptor_type() {                      \
    g_register_##interceptor_type.Touch();                                 \
    return 0;                                                              \
  }

#define USE_INTERCEPTOR(interceptor_type)                   \
  extern int TouchRegisterInterceptor_##interceptor_type(); \
  UNUSED static int use_interceptor_##interceptor_type =    \
      TouchRegisterInterceptor_##interceptor_type();
149

150 151
}  // namespace distributed
}  // namespace paddle