interceptor.h 5.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <condition_variable>
18
#include <deque>
19
#include <functional>
20 21 22 23 24 25
#include <map>
#include <memory>
#include <thread>
#include <vector>

#include "paddle/fluid/distributed/fleet_executor/interceptor_message.pb.h"
26
#include "paddle/fluid/framework/blocking_queue.h"
27 28
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/errors.h"
29
#include "paddle/fluid/platform/macros.h"
30
#include "paddle/fluid/platform/place.h"
31 32

namespace paddle {
33 34
namespace framework {
class Scope;
35
class GarbageCollector;
36
}  // namespace framework
37 38 39
namespace distributed {

class TaskNode;
40
class Carrier;
41
class TaskLoop;
42

43 44 45
constexpr int64_t SOURCE_ID = -1;
constexpr int64_t SINK_ID = -2;

46
class Interceptor {
47
 public:
48
  using MsgHandle = std::function<void(const InterceptorMessage&)>;
49

50 51 52
 public:
  Interceptor() = delete;

53
  Interceptor(int64_t interceptor_id, TaskNode* node);
54

55
  virtual ~Interceptor();
56

57
  // register interceptor handle
58
  void RegisterMsgHandle(MsgHandle handle);
59 60 61

  void Handle(const InterceptorMessage& msg);

62
  // return the interceptor id
63
  int64_t GetInterceptorId() const { return interceptor_id_; }
64 65

  // Called by Carrier, enqueue an InterceptorMessage to remote mailbox
66
  void EnqueueRemoteInterceptorMessage(
67 68
      const InterceptorMessage& interceptor_message);

69
  bool Send(int64_t dst_id, InterceptorMessage& msg);  // NOLINT
70

71 72 73 74 75 76 77
  void SetPlace(const platform::Place& place) { place_ = place; }

  void SetRootScope(framework::Scope* scope) { root_scope_ = scope; }
  void SetMiniBatchScope(framework::Scope* scope) { minibatch_scope_ = scope; }
  void SetMicroBatchScope(const std::vector<framework::Scope*>& scopes) {
    microbatch_scopes_ = scopes;
  }
78 79 80
  void SetGC(const std::shared_ptr<framework::GarbageCollector>& gc) {
    gc_ = gc;
  }
81
  void RegisterCarrier(Carrier* carrier) { carrier_ = carrier; }
82
  void RegisterTaskLoop(TaskLoop* loop) { loop_ = loop; }
83 84 85

  TaskNode* GetTaskNode() const { return node_; }

86 87
  DISABLE_COPY_AND_ASSIGN(Interceptor);

88
 protected:
89 90 91 92 93 94 95
  // interceptor id, handed from above layer
  int64_t interceptor_id_;

  // node need to be handled by this interceptor
  TaskNode* node_;

  // for stop
96
  bool stop_{false};
97
  void StopCarrier();
98

99 100 101 102 103
  // for runtime
  platform::Place place_;
  framework::Scope* root_scope_{nullptr};
  framework::Scope* minibatch_scope_{nullptr};
  std::vector<framework::Scope*> microbatch_scopes_{};
104
  std::shared_ptr<framework::GarbageCollector> gc_{nullptr};
105

106
  Carrier* carrier_;
107
  TaskLoop* loop_;
108

109
 private:
110
  void LoopOnce();
111

112
  // interceptor handle which process message
113
  MsgHandle handle_{nullptr};
114

115 116
  std::mutex mutex_;
  std::deque<InterceptorMessage> messages_;
117 118 119

  int64_t already_run_times_{0};
  int64_t used_slot_nums_{0};
120 121
};

122 123 124 125 126 127 128 129 130 131 132 133 134
class InterceptorFactory {
 public:
  using CreateInterceptorFunc = std::unique_ptr<Interceptor> (*)(int64_t,
                                                                 TaskNode*);
  using CreateInterceptorMap =
      std::unordered_map<std::string, CreateInterceptorFunc>;

  static void Register(const std::string& type, CreateInterceptorFunc func);

  static std::unique_ptr<Interceptor> Create(const std::string& type,
                                             int64_t id, TaskNode* node);
};

135 136 137 138 139
template <typename InterceptorClass>
std::unique_ptr<Interceptor> CreatorInterceptor(int64_t id, TaskNode* node) {
  return std::make_unique<InterceptorClass>(id, node);
}

140 141 142 143 144
#define REGISTER_INTERCEPTOR(interceptor_type, interceptor_class)          \
  class __RegisterInterceptor_##interceptor_type {                         \
   public:                                                                 \
    __RegisterInterceptor_##interceptor_type() {                           \
      InterceptorFactory::Register(#interceptor_type,                      \
145
                                   CreatorInterceptor<interceptor_class>); \
146
    }                                                                      \
147
    void Touch() {}                                                        \
148
  };                                                                       \
149 150 151 152 153 154 155 156 157 158
  __RegisterInterceptor_##interceptor_type g_register_##interceptor_type;  \
  int TouchRegisterInterceptor_##interceptor_type() {                      \
    g_register_##interceptor_type.Touch();                                 \
    return 0;                                                              \
  }

#define USE_INTERCEPTOR(interceptor_type)                   \
  extern int TouchRegisterInterceptor_##interceptor_type(); \
  UNUSED static int use_interceptor_##interceptor_type =    \
      TouchRegisterInterceptor_##interceptor_type();
159

160 161
}  // namespace distributed
}  // namespace paddle