carrier.cc 11.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

15 16
#include "paddle/fluid/distributed/fleet_executor/carrier.h"

17 18
#include <algorithm>

19
#include "paddle/fluid/distributed/fleet_executor/global.h"
20
#include "paddle/fluid/distributed/fleet_executor/interceptor.h"
21
#include "paddle/fluid/distributed/fleet_executor/message_bus.h"
22
#include "paddle/fluid/distributed/fleet_executor/runtime_graph.h"
23
#include "paddle/fluid/distributed/fleet_executor/task_node.h"
24
#include "paddle/fluid/framework/garbage_collector.h"
25
#include "paddle/fluid/framework/program_desc.h"
26
#include "paddle/fluid/framework/scope.h"
27
#include "paddle/fluid/framework/variable_helper.h"
28 29 30 31

namespace paddle {
namespace distributed {

32
USE_INTERCEPTOR(Source);
33
USE_INTERCEPTOR(Compute);
34
USE_INTERCEPTOR(Amplifier);
35
USE_INTERCEPTOR(Sink);
36

37 38
void Carrier::Init(
    int64_t rank,
39
    const std::unordered_map<int64_t, int64_t>& interceptor_id_to_rank) {
40
  rank_ = rank;
41 42 43 44 45 46 47 48 49 50 51 52
  interceptor_id_to_rank_ = interceptor_id_to_rank;

  // TODO(fleet_exe dev): thread pool
  thread_num_ = 1;
  thread_pool_.SetThreadNum(thread_num_);
  thread_pool_.Start();
}

void Carrier::Init(
    int64_t rank,
    const std::unordered_map<int64_t, int64_t>& interceptor_id_to_rank,
    const std::unordered_map<int64_t, TaskNode*>& interceptor_id_to_node,
53 54 55 56
    const framework::ProgramDesc& program,
    framework::Scope* scope,
    int64_t num_micro_batches,
    const platform::Place& place,
57
    const std::vector<std::string>& inference_root_scope_vars) {
58 59 60
  rank_ = rank;
  interceptor_id_to_rank_ = interceptor_id_to_rank;
  interceptor_id_to_node_ = interceptor_id_to_node;
61
  place_ = place;
62
  root_scope_ = scope;
63
  dev_ctx_ = platform::DeviceContextPool::Instance().Get(place_);
64

65 66 67
  PADDLE_ENFORCE_NOT_NULL(
      root_scope_,
      platform::errors::InvalidArgument("root_scope can not be nullptr"));
68 69 70 71
  minibatch_scope_ = &root_scope_->NewScope();
  microbatch_scopes_.resize(num_micro_batches);
  for (int i = 0; i < num_micro_batches; ++i) {
    microbatch_scopes_[i] = &minibatch_scope_->NewScope();
72
    CopyParameters(i, program, inference_root_scope_vars);
73 74
  }

75 76 77 78 79
  // TODO(fleet_exe dev): thread pool
  thread_num_ = 1;
  thread_pool_.SetThreadNum(thread_num_);
  thread_pool_.Start();

80
  CreateInterceptors();
81
  is_init_ = true;
82 83
}

84 85 86 87 88
void Carrier::Release() {
  if (root_scope_) {
    root_scope_->DropKids();
  }
}
89

90 91
Carrier::~Carrier() { VLOG(3) << "Carrier's destructor."; }

92
void Carrier::CopyParameters(
93 94
    int microbatch_id,
    const framework::ProgramDesc& program,
95
    const std::vector<std::string>& inference_root_scope_vars) {
96 97
  auto& global_block = program.Block(0);

98 99 100 101
  std::map<std::string, int> inference_root_scope_var_map;
  for (auto var_name : inference_root_scope_vars) {
    inference_root_scope_var_map.insert({var_name, 1});
  }
102
  for (auto& var : global_block.AllVars()) {
103 104 105 106 107 108 109
    std::string var_name = var->Name();
    bool force_root = inference_root_scope_var_map.find(var_name) !=
                      inference_root_scope_var_map.end();
    if (force_root) {
      VLOG(4) << var_name << " will be forced to be created in the root scope.";
    }
    if ((var->Persistable() || force_root) && microbatch_id == 0) {
110 111 112 113 114 115 116 117 118 119 120 121 122
      auto* ptr = root_scope_->Var(var->Name());
      InitializeVariable(ptr, var->GetType());
      VLOG(5) << "Create persistable var: " << var->Name()
              << ", which pointer is " << ptr;
    } else if (!var->Persistable()) {
      auto* ptr = microbatch_scopes_[microbatch_id]->Var(var->Name());
      VLOG(5) << "Create variable " << var->Name() << " for microbatch "
              << microbatch_id << ", which pointer is " << ptr << ".";
      InitializeVariable(ptr, var->GetType());
    }
  }
}

123 124
bool Carrier::EnqueueInterceptorMessage(
    const InterceptorMessage& interceptor_message) {
125
  PADDLE_ENFORCE_EQ(
126 127
      interceptor_message.ctrl_message(),
      false,
128 129 130 131 132
      platform::errors::Fatal(
          "Control message should be only send inter rank using message bus."));
  int64_t dst_id = interceptor_message.dst_id();
  Interceptor* dst_interceptor = GetInterceptor(dst_id);
  dst_interceptor->EnqueueRemoteInterceptorMessage(interceptor_message);
133
  return true;
134 135 136 137
}

Interceptor* Carrier::GetInterceptor(int64_t interceptor_id) {
  auto iter = interceptor_idx_to_interceptor_.find(interceptor_id);
138 139
  PADDLE_ENFORCE_NE(iter,
                    interceptor_idx_to_interceptor_.end(),
140 141 142 143 144
                    platform::errors::InvalidArgument(
                        "Cannot find interceptor instance for interceptor "
                        "id %lld. Wrong dst? Call before init?",
                        interceptor_id));
  return iter->second.get();
145 146
}

147 148 149 150 151
void Carrier::Wait() {
  std::unique_lock<std::mutex> lock(running_mutex_);
  cond_var_.wait(lock);
}

152 153 154 155 156
void Carrier::WakeUp() {
  // probably double notify, but ok for ut
  cond_var_.notify_all();
}

157
void Carrier::Start() {
158 159
  PADDLE_ENFORCE_EQ(is_init_,
                    true,
160 161
                    platform::errors::PreconditionNotMet(
                        "Using carrier before initialized."));
162 163 164 165 166 167 168 169
  for (int64_t id : source_interceptor_ids_) {
    VLOG(3) << "Carrier Start is sending start to source interceptor " << id
            << ".";
    InterceptorMessage start_msg;
    // source node data_is_ready is send by carrier, so set src_id=-1
    start_msg.set_src_id(-1);
    start_msg.set_dst_id(id);
    start_msg.set_message_type(DATA_IS_READY);
170
    Send(start_msg);
171
  }
172
  // TODO(wangxi): async step
173
  Wait();
174
  dev_ctx_->Wait();
175 176 177 178 179 180 181 182 183
  for (auto* micro_scope : microbatch_scopes_) {
    // By default, we should delete all kid scopes after run executor because
    // some operators may create local scope when running, such as while_op.
    // But when while_op also create a local executor to run it's sub block,
    // the sub scopes it created should not be dropped immediately, because
    // while_grad_op will use some variables created during while_op run, so
    // we need to keep the kids and wait for the outer executor to drop them.
    micro_scope->DropKids();
  }
184 185 186 187
}

bool Carrier::IsInit() const { return is_init_; }

188 189 190 191 192 193 194 195 196 197
int64_t Carrier::GetRank(int64_t interceptor_id) const {
  PADDLE_ENFORCE_NE(
      interceptor_id_to_rank_.find(interceptor_id),
      interceptor_id_to_rank_.end(),
      platform::errors::NotFound("Cannot find rank for interceptor id %lld.",
                                 interceptor_id));
  return interceptor_id_to_rank_.at(interceptor_id);
}

bool Carrier::Send(const InterceptorMessage& msg) {
198 199 200 201 202 203 204
  int64_t src_id = msg.src_id();
  // TODO(liyurui): compatible solution, will be removed completely in the
  // future
  if (interceptor_id_to_rank_.find(src_id) == interceptor_id_to_rank_.end() &&
      src_id == SOURCE_ID) {
    src_id = msg.dst_id();
  }
205 206 207 208
  int64_t dst_id = msg.dst_id();
  int64_t src_rank = GetRank(src_id);
  int64_t dst_rank = GetRank(dst_id);
  PADDLE_ENFORCE_EQ(
209 210
      src_rank,
      rank_,
211 212
      platform::errors::Fatal("The source rank id %lld, which is not equal to "
                              "the carrier rank id %lld.",
213 214
                              src_rank,
                              rank_));
215 216 217
  if (src_rank == dst_rank) {
    VLOG(3) << "Send a message from interceptor " << src_id
            << " to interceptor " << dst_id << ", which are in the same ranks.";
218
    return EnqueueInterceptorMessage(msg);
219 220 221 222
  } else {
    VLOG(3) << "Send a message from interceptor " << src_id
            << " to interceptor " << dst_id
            << ", which are in different ranks.";
223
    return GlobalVal<MessageBus>::Get()->Send(dst_rank, msg);
224
  }
225 226
}

227 228 229
Interceptor* Carrier::SetInterceptor(int64_t interceptor_id,
                                     std::unique_ptr<Interceptor> interceptor) {
  auto iter = interceptor_idx_to_interceptor_.find(interceptor_id);
230 231
  PADDLE_ENFORCE_EQ(iter,
                    interceptor_idx_to_interceptor_.end(),
232 233 234 235
                    platform::errors::AlreadyExists(
                        "The interceptor id %lld has already been created! "
                        "The interceptor id should be unique.",
                        interceptor_id));
236
  interceptor->RegisterCarrier(this);
237 238 239 240 241 242 243

  // TODO(fleet_exe dev): get loop
  auto* loop = thread_pool_.GetLoop(interceptor_id % thread_num_);
  PADDLE_ENFORCE_NOT_NULL(
      loop, platform::errors::Fatal("thread task loop must not null"));
  interceptor->RegisterTaskLoop(loop);

244 245 246 247 248 249
  auto* ptr = interceptor.get();
  interceptor_idx_to_interceptor_.insert(
      std::make_pair(interceptor_id, std::move(interceptor)));
  return ptr;
}

250 251 252 253 254 255 256 257
static std::shared_ptr<framework::GarbageCollector> GetGC(
    const platform::Place& place) {
  int64_t max_memory_size = framework::GetEagerDeletionThreshold();
  std::shared_ptr<framework::GarbageCollector> gc;
  if (max_memory_size >= 0) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
    if (platform::is_gpu_place(place)) {
      if (framework::IsFastEagerDeletionModeEnabled()) {
258 259
        gc.reset(new framework::UnsafeFastGPUGarbageCollector(place,
                                                              max_memory_size));
260 261 262 263 264 265 266 267
      }
    }
#endif
  }  // max_memory_size >= 0

  return gc;
}

268
void Carrier::CreateInterceptors() {
269
  if (interceptor_id_to_node_.empty()) return;
270 271 272

  auto gc = GetGC(place_);

273
  // create each Interceptor
274
  // no auto init since there is no config
275 276 277
  for (const auto& item : interceptor_id_to_node_) {
    int64_t interceptor_id = item.first;
    TaskNode* task_node = item.second;
278

279
    PADDLE_ENFORCE_LT(
280 281
        task_node->run_at_offset(),
        task_node->run_per_steps(),
282 283 284
        platform::errors::InvalidArgument(
            "Interceptor's run_at_offset must < run_per_steps, must now "
            "run_at_offset=%ld run_per_steps=%ld",
285 286
            task_node->run_at_offset(),
            task_node->run_per_steps()));
287

288
    std::unique_ptr<Interceptor> interceptor;
289 290
    PADDLE_ENFORCE_NE(task_node->type().empty(),
                      true,
291 292 293
                      platform::errors::NotFound(
                          "Cannot found type for task node with id %lld",
                          task_node->task_id()));
294 295
    interceptor = InterceptorFactory::Create(
        task_node->type(), interceptor_id, task_node);
296 297 298 299 300 301 302 303 304 305 306 307
    interceptor->SetPlace(place_);
    interceptor->SetMiniBatchScope(minibatch_scope_);
    interceptor->SetMicroBatchScope(microbatch_scopes_);
    interceptor->SetRootScope(root_scope_);
    interceptor->SetGC(gc);

    SetInterceptor(interceptor_id, std::move(interceptor));
    VLOG(3) << "Create Interceptor with interceptor id: " << interceptor_id
            << " with type: " << task_node->type() << ".";

    if (task_node->upstream().empty()) {
      source_interceptor_ids_.emplace_back(interceptor_id);
308
    }
309
  }
310 311 312 313
}

}  // namespace distributed
}  // namespace paddle