communicator.h 16.9 KB
Newer Older
Q
Qiao Longfei 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

17
#include <ThreadPool.h>
18
#include <atomic>
Q
Qiao Longfei 已提交
19
#include <deque>
20
#include <map>
Q
Qiao Longfei 已提交
21 22
#include <memory>
#include <string>
Q
Qiao Longfei 已提交
23
#include <unordered_map>
24
#include <unordered_set>
Q
Qiao Longfei 已提交
25
#include <utility>
Q
Qiao Longfei 已提交
26
#include <vector>
27
#include "gflags/gflags.h"
Q
Qiao Longfei 已提交
28 29 30

#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/variable.h"
C
Chengmo 已提交
31 32
#include "paddle/fluid/operators/distributed/distributed.h"
#include "paddle/fluid/operators/distributed/rpc_client.h"
Q
Qiao Longfei 已提交
33
#include "paddle/fluid/operators/distributed/rpc_common.h"
C
Chengmo 已提交
34
#include "paddle/fluid/operators/distributed_ops/send_recv_util.h"
Q
Qiao Longfei 已提交
35 36
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
Q
Qiao Longfei 已提交
37 38 39
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
T
tangwei12 已提交
40
#include "paddle/fluid/string/split.h"
Q
Qiao Longfei 已提交
41

42 43
DECLARE_bool(communicator_is_sgd_optimizer);

Q
Qiao Longfei 已提交
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
namespace paddle {
namespace operators {
namespace distributed {

using Scope = framework::Scope;
using Variable = framework::Variable;

template <typename T>
class BlockingQueue {
 public:
  explicit BlockingQueue(size_t capacity) : capacity_(capacity) {
    PADDLE_ENFORCE_GT(capacity_, 0, "The capacity must be greater than 0.");
  }

  bool Push(const T& elem) {
Q
Qiao Longfei 已提交
59 60 61 62 63 64 65
    {
      std::unique_lock<std::mutex> lock(mutex_);
      cv_.wait(lock, [&] { return queue_.size() < capacity_; });
      PADDLE_ENFORCE_LT(queue_.size(), capacity_);
      queue_.push_back(elem);
    }
    cv_.notify_one();
Q
Qiao Longfei 已提交
66 67 68 69
    return true;
  }

  bool Push(T&& elem) {
Q
Qiao Longfei 已提交
70 71 72 73 74 75 76
    {
      std::unique_lock<std::mutex> lock(mutex_);
      cv_.wait(lock, [&] { return queue_.size() < capacity_; });
      PADDLE_ENFORCE_LT(queue_.size(), capacity_);
      queue_.emplace_back(std::move(elem));
    }
    cv_.notify_one();
Q
Qiao Longfei 已提交
77 78 79 80 81
    return true;
  }

  T Pop() {
    std::unique_lock<std::mutex> lock(mutex_);
Q
Qiao Longfei 已提交
82
    cv_.wait(lock, [=] { return !queue_.empty(); });
Q
Qiao Longfei 已提交
83 84
    T rc(std::move(queue_.front()));
    queue_.pop_front();
Q
Qiao Longfei 已提交
85
    cv_.notify_one();
Q
Qiao Longfei 已提交
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
    return rc;
  }

  size_t Cap() const {
    std::lock_guard<std::mutex> lock(mutex_);
    return capacity_;
  }

  size_t Size() const {
    std::lock_guard<std::mutex> lock(mutex_);
    return queue_.size();
  }

 private:
  const size_t capacity_;
  std::deque<T> queue_;

  mutable std::mutex mutex_;
Q
Qiao Longfei 已提交
104
  std::condition_variable cv_;
Q
Qiao Longfei 已提交
105 106
};

Q
Qiao Longfei 已提交
107 108 109 110
template <typename T, int MajorType = Eigen::RowMajor,
          typename IndexType = Eigen::DenseIndex>
using EigenVector = framework::EigenVector<T, MajorType, IndexType>;

1
123malin 已提交
111
template <typename T>
Q
Qiao Longfei 已提交
112 113
inline void MergeVars(const std::string& var_name,
                      const std::vector<std::shared_ptr<Variable>>& vars,
1
123malin 已提交
114
                      Scope* scope, bool merge_add = true) {
Q
Qiao Longfei 已提交
115 116 117 118 119 120
  PADDLE_ENFORCE(!vars.empty(), "should have value to merge!");
  auto cpu_place = platform::CPUPlace();
  auto& var0 = vars[0];
  auto* out_var = scope->Var(var_name);
  if (var0->IsType<framework::LoDTensor>()) {
    auto dims = var0->Get<framework::LoDTensor>().dims();
1
123malin 已提交
121 122
    VLOG(3) << "merge " << var_name << " LoDTensor dims " << dims
            << "; merge add: " << merge_add;
Q
Qiao Longfei 已提交
123 124
    // init output tensor
    auto* out_t = out_var->GetMutable<framework::LoDTensor>();
1
123malin 已提交
125
    out_t->mutable_data<T>(dims, cpu_place);
Q
Qiao Longfei 已提交
126 127 128 129 130 131 132 133
    // check the input dims
    for (auto& var : vars) {
      auto& var_t = var->Get<framework::LoDTensor>();
      PADDLE_ENFORCE_EQ(var_t.dims(), dims, "should have the same dims");
    }

    // set output tensor to 0.
    auto cpu_ctx = paddle::platform::CPUDeviceContext();
1
123malin 已提交
134 135
    math::SetConstant<paddle::platform::CPUDeviceContext, T> constant_functor;
    constant_functor(cpu_ctx, out_t, static_cast<T>(0));
Q
Qiao Longfei 已提交
136
    // sum all vars to out
1
123malin 已提交
137
    auto result = EigenVector<T>::Flatten(*out_t);
Q
Qiao Longfei 已提交
138 139
    for (auto& var : vars) {
      auto& in_t = var->Get<framework::LoDTensor>();
1
123malin 已提交
140
      auto in = EigenVector<T>::Flatten(in_t);
Q
Qiao Longfei 已提交
141 142
      result.device(*cpu_ctx.eigen_device()) = result + in;
    }
1
123malin 已提交
143
    if (!merge_add) {
144
      result.device(*cpu_ctx.eigen_device()) =
1
123malin 已提交
145
          result / static_cast<T>(vars.size());
146
    }
Q
Qiao Longfei 已提交
147 148 149 150
  } else if (var0->IsType<framework::SelectedRows>()) {
    auto& slr0 = var0->Get<framework::SelectedRows>();
    auto* out_slr = out_var->GetMutable<framework::SelectedRows>();
    out_slr->mutable_rows()->clear();
1
123malin 已提交
151
    out_slr->mutable_value()->mutable_data<T>({{}}, cpu_place);
Q
Qiao Longfei 已提交
152 153 154 155 156 157
    std::vector<const paddle::framework::SelectedRows*> inputs;
    inputs.reserve(vars.size());
    for (auto& var : vars) {
      inputs.push_back(&var->Get<framework::SelectedRows>());
    }
    auto dev_ctx = paddle::platform::CPUDeviceContext();
1
123malin 已提交
158 159
    if (merge_add) {
      math::scatter::MergeAdd<paddle::platform::CPUDeviceContext, T> merge_add;
160 161
      merge_add(dev_ctx, inputs, out_slr);
    } else {
1
123malin 已提交
162
      math::scatter::MergeAverage<paddle::platform::CPUDeviceContext, T>
163 164 165 166
          merge_average;
      merge_average(dev_ctx, inputs, out_slr);
    }

Q
Qiao Longfei 已提交
167
    VLOG(3) << "merge " << var_name << " SelectedRows height: " << slr0.height()
1
123malin 已提交
168
            << " dims: " << slr0.value().dims() << "; merge add: " << merge_add;
Q
Qiao Longfei 已提交
169 170 171 172 173
  } else {
    PADDLE_THROW("unsupported var type!");
  }
}

Q
Qiao Longfei 已提交
174 175
using RpcCtxMap = std::unordered_map<std::string, RpcContext>;

Q
Qiao Longfei 已提交
176 177
class Communicator {
 public:
1
123malin 已提交
178
  Communicator();
179
  explicit Communicator(const std::map<std::string, std::string>& envs);
T
tangwei12 已提交
180
  virtual ~Communicator() {}
Q
Qiao Longfei 已提交
181

T
tangwei12 已提交
182 183 184
  virtual void Start() = 0;
  virtual void Stop() = 0;
  virtual bool IsRunning() { return running_; }
Q
Qiao Longfei 已提交
185

186 187
  virtual void Clean() {}

188 189
  virtual void Send(const std::vector<std::string>& var_names,
                    const std::vector<std::string>& var_tables,
190 191
                    const framework::Scope& scope) = 0;

T
tangwei12 已提交
192
  virtual void Recv() = 0;
Q
Qiao Longfei 已提交
193

194 195 196 197
  virtual void Barrier() {}
  virtual void BarrierTriggerDecrement() {}
  virtual void BarrierTriggerReset(int init_counter) {}

T
tangwei12 已提交
198 199
  virtual void InitImpl(const RpcCtxMap& send_varname_to_ctx,
                        const RpcCtxMap& recv_varname_to_ctx,
200
                        Scope* recv_scope) {}
T
tangwei12 已提交
201 202
  virtual void InitImpl(const paddle::framework::ProgramDesc& program,
                        Scope* recv_scope) = 0;
Q
Qiao Longfei 已提交
203

T
tangwei12 已提交
204 205 206 207
  static Communicator* GetInstance() { return communicator_.get(); }
  static std::shared_ptr<Communicator> GetInstantcePtr() {
    return communicator_;
  }
208 209
  template <typename T>
  static Communicator* InitInstance(
1
123malin 已提交
210
      const paddle::framework::ProgramDesc& program, Scope* recv_scope,
211
      const std::map<std::string, std::string>& envs) {
212
    std::call_once(init_flag_, &Communicator::InitWithProgram<T>, program,
213
                   recv_scope, std::ref(envs));
214 215 216
    return communicator_.get();
  }

T
tangwei12 已提交
217 218
  template <typename T>
  static void InitWithProgram(const paddle::framework::ProgramDesc& program,
1
123malin 已提交
219
                              Scope* recv_scope,
220
                              const std::map<std::string, std::string>& envs) {
T
tangwei12 已提交
221
    if (communicator_.get() == nullptr) {
222
      communicator_.reset(new T(std::ref(envs)));
T
tangwei12 已提交
223 224 225 226 227 228 229 230
      communicator_->InitImpl(program, recv_scope);
    }
  }

 protected:
  bool running_ = false;
  static std::shared_ptr<Communicator> communicator_;
  static std::once_flag init_flag_;
231
  std::unordered_map<std::string, std::string> envs;
T
tangwei12 已提交
232 233
};

234
using SparseIdsMap =
C
Chengmo 已提交
235
    std::unordered_map<std::string, std::vector<std::unordered_set<int64_t>>>;
236

T
tangwei12 已提交
237 238
class AsyncCommunicator : public Communicator {
 public:
1
123malin 已提交
239
  AsyncCommunicator() : Communicator() {}
240 241 242 243 244 245 246 247 248 249 250 251
  explicit AsyncCommunicator(const std::map<std::string, std::string>& envs)
      : Communicator(envs) {
    independent_recv_thread_ = static_cast<bool>(
        std::stoi(envs.at("communicator_independent_recv_thread")));
    min_send_grad_num_before_recv_ =
        std::stoi(envs.at("communicator_min_send_grad_num_before_recv"));
    thread_pool_size_ = std::stoi(envs.at("communicator_thread_pool_size"));
    max_merge_var_num_ = std::stoi(envs.at("communicator_max_merge_var_num"));
    send_wait_times_ = std::stoi(envs.at("communicator_send_wait_times"));
    send_queue_size_ = std::stoi(envs.at("communicator_send_queue_size"));
    is_sgd_optimizer_ =
        static_cast<bool>(std::stoi(envs.at("communicator_is_sgd_optimizer")));
T
tangwei12 已提交
252
    VLOG(0) << "AsyncCommunicator Initialized";
253
  }
T
tangwei12 已提交
254 255 256 257 258
  ~AsyncCommunicator();
  void Start() override;
  void Stop() override;

  void Recv() override;
Q
Qiao Longfei 已提交
259
  void RecvAll();
T
tangwei12 已提交
260 261 262 263 264 265 266 267

  void InitImpl(const RpcCtxMap& send_varname_to_ctx,
                const RpcCtxMap& recv_varname_to_ctx,
                Scope* recv_scope) override;

  void InitImpl(const paddle::framework::ProgramDesc& program,
                Scope* recv_scope) override;

Q
Qiao Longfei 已提交
268 269 270
  void SendThread();
  void RecvThread();

271 272
  void Send(const std::vector<std::string>& var_names,
            const std::vector<std::string>& var_tables,
273 274
            const framework::Scope& scope) override;

275 276 277 278 279 280 281 282
 private:
  int min_send_grad_num_before_recv_;
  int thread_pool_size_;
  int max_merge_var_num_;
  int send_wait_times_;
  int send_queue_size_;
  bool independent_recv_thread_;
  bool is_sgd_optimizer_;
283

T
tangwei12 已提交
284
 private:
Q
Qiao Longfei 已提交
285 286 287
  std::unordered_map<std::string,
                     std::shared_ptr<BlockingQueue<std::shared_ptr<Variable>>>>
      send_varname_to_queue_;
Q
Qiao Longfei 已提交
288 289
  RpcCtxMap send_varname_to_ctx_;
  RpcCtxMap recv_varname_to_ctx_;
290 291
  std::unique_ptr<std::thread> send_thread_{nullptr};
  std::unique_ptr<std::thread> recv_thread_{nullptr};
Q
Qiao Longfei 已提交
292 293
  Scope* recv_scope_;                  // should be global scope
  std::unique_ptr<Scope> send_scope_;  // an independent scope
Q
Qiao Longfei 已提交
294 295
  std::unique_ptr<::ThreadPool> send_threadpool_{nullptr};
  std::unique_ptr<::ThreadPool> recv_threadpool_{nullptr};
296
  std::atomic_uint grad_num_{0};  // the num of gradient sent since last recv
Q
Qiao Longfei 已提交
297 298
};

299
class HalfAsyncCommunicator : public Communicator {
300
 public:
301 302 303 304 305 306 307
  HalfAsyncCommunicator() {}
  explicit HalfAsyncCommunicator(const std::map<std::string, std::string>& envs)
      : Communicator(envs) {
    max_merge_var_num_ = std::stoi(envs.at("communicator_max_merge_var_num"));
    send_wait_times_ = std::stoi(envs.at("communicator_send_wait_times"));
    thread_pool_size_ = std::stoi(envs.at("communicator_thread_pool_size"));
    send_queue_size_ = std::stoi(envs.at("communicator_send_queue_size"));
T
tangwei12 已提交
308
    VLOG(0) << "HalfAsyncCommunicator Initialized";
309 310
  }
  ~HalfAsyncCommunicator();
311 312 313
  void Start() override;
  void Stop() override;

314 315
  void Clean() override;

316 317
  void Send(const std::vector<std::string>& var_names,
            const std::vector<std::string>& var_tables,
318 319 320 321
            const framework::Scope& scope) override;

  void Recv() override;

322 323 324 325 326 327
  void Barrier() override;
  void BarrierWeakUp();

  void BarrierTriggerDecrement() override;
  void BarrierTriggerReset(int initial_val) override;

328 329 330 331 332 333 334
  void InitImpl(const RpcCtxMap& send_varname_to_ctx,
                const RpcCtxMap& recv_varname_to_ctx,
                Scope* recv_scope) override;

  void InitImpl(const paddle::framework::ProgramDesc& program,
                Scope* recv_scope) override;

335
  void ConsumeThread();
T
tangwei12 已提交
336 337
  virtual void BarrierSend() {}
  virtual void BarrierRecv() {}
338

T
tangwei12 已提交
339
 protected:
340 341 342 343
  int max_merge_var_num_;
  int send_wait_times_;
  int thread_pool_size_;
  int send_queue_size_;
T
tangwei12 已提交
344
  int trainer_id_ = 0;
345

T
tangwei12 已提交
346
 protected:
347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364
  std::unordered_map<std::string,
                     std::shared_ptr<BlockingQueue<std::shared_ptr<Variable>>>>
      send_varname_to_queue_;
  RpcCtxMap send_varname_to_ctx_;
  RpcCtxMap recv_varname_to_ctx_;
  std::unique_ptr<std::thread> consume_thread_{nullptr};
  Scope* recv_scope_;                  // should be global scope
  std::unique_ptr<Scope> send_scope_;  // an independent scope
  std::unique_ptr<::ThreadPool> consume_threadpool_{nullptr};
  std::unique_ptr<::ThreadPool> recv_threadpool_{nullptr};

  // mutex for Wait for barrier
  std::mutex barrier_mutex_;
  std::condition_variable barrier_cond_;
  std::atomic<int64_t> barrier_trigger_{0};
  std::atomic<int64_t> barrier_counter_{0};
};

T
tangwei12 已提交
365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382
class SyncCommunicator : public HalfAsyncCommunicator {
 public:
  SyncCommunicator() : HalfAsyncCommunicator() {}
  explicit SyncCommunicator(const std::map<std::string, std::string>& envs)
      : HalfAsyncCommunicator(envs) {
    trainer_id_ = std::stoi(envs.at("trainer_id"));
    auto pserver_strings = envs.at("pserver_endpoints");
    pserver_endpoints_ = paddle::string::Split(pserver_strings, ',');
    VLOG(0) << "SyncCommunicator Initialized";
  }
  ~SyncCommunicator();
  void BarrierSend();
  void BarrierRecv();

 private:
  std::vector<std::string> pserver_endpoints_{};
};

383 384 385 386 387 388 389 390 391
class GeoSgdCommunicator : public Communicator {
 public:
  GeoSgdCommunicator() : Communicator() {}
  explicit GeoSgdCommunicator(const std::map<std::string, std::string>& envs)
      : Communicator(envs) {
    geo_need_push_nums_ = std::stoi(envs.at("geo_need_push_nums"));
    trainer_nums_ = std::stoi(envs.at("geo_trainer_nums"));
    thread_pool_size_ = std::stoi(envs.at("communicator_thread_pool_size"));
    send_wait_times_ = std::stoi(envs.at("communicator_send_wait_times"));
T
tangwei12 已提交
392
    VLOG(0) << "GeoSgdCommunicator Initialized";
393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408
  }

  ~GeoSgdCommunicator();

  void Start() override;
  void Stop() override;

  void Send(const std::vector<std::string>& var_names,
            const std::vector<std::string>& var_tables,
            const framework::Scope& scope) override;

  void Recv() override;

  void InitImpl(const paddle::framework::ProgramDesc& program,
                Scope* recv_scope) override;

409 410 411 412
 private:
  void SendThread();
  std::unordered_set<int64_t> SparseIdsMerge(
      const std::vector<SparseIdsMap>& ids_send_vec,
C
Chengmo 已提交
413
      const std::string& var_name, const std::string& splited_var_name);
414

415 416
  void SendUpdateDenseVars(const std::string& var_name,
                           const std::string& splited_var_name);
417

418
  void SendUpdateSparseVars(const std::string& var_name,
C
Chengmo 已提交
419
                            const std::string& splited_var_name,
420
                            const std::unordered_set<int64_t>& ids_table);
C
Chengmo 已提交
421

422 423
  void RecvUpdateDenseVars(const std::string& var_name,
                           const std::string& splited_var_name);
C
Chengmo 已提交
424 425
  void RecvUpdateSparseVars(const std::string& var_name,
                            const std::string& splited_var_name);
426 427 428 429 430 431 432 433 434

  void GeoSgdDenseParamInit(framework::Scope* scope_x,
                            framework::Scope* scope_y,
                            const std::string var_name);

  void GeoSgdSparseParamInit(framework::Scope* scope_x,
                             framework::Scope* scope_y,
                             const std::string var_name);

C
Chengmo 已提交
435 436 437 438 439 440 441 442
  void RpcSend(const std::string& origin_var_name,
               const std::string& splited_var_name,
               const size_t& splited_var_index);

  void RpcRecv(const std::string& origin_var_name,
               const std::string& splited_var_name,
               const size_t& splited_var_index);

443 444 445 446 447 448 449 450 451 452 453 454 455
  const std::string VarToDeltaVar(const std::string var_name) {
    std::string delta_name = var_name;
    const std::string send_name = delta_name.append(".delta");
    return send_name;
  }

  const std::string DeltaVarToVar(const std::string var_name) {
    std::string origin_name = var_name;
    origin_name.erase(origin_name.find(".delta"), 6);
    const std::string param_name = origin_name;
    return param_name;
  }

C
Chengmo 已提交
456 457 458 459 460 461 462 463 464 465 466 467 468 469
  size_t GetSplitedVarIndex(const std::string var_name,
                            const std::string splited_var_name) {
    size_t index = 0;
    for (size_t i = 0;
         i < send_varname_to_ctx_[var_name].splited_var_names.size(); i++) {
      if (send_varname_to_ctx_[var_name].splited_var_names[i] ==
          splited_var_name) {
        index = i;
        break;
      }
    }
    return index;
  }

470 471
 private:
  int trainer_nums_ = 1;
472 473 474 475 476
  int geo_need_push_nums_ = 100;
  int thread_pool_size_;
  int send_wait_times_;

 private:
477 478
  int send_var_nums_ = 0;

479 480
  RpcCtxMap send_varname_to_ctx_;
  RpcCtxMap recv_varname_to_ctx_;
481 482 483 484 485 486 487 488 489 490 491 492 493 494 495

  // parameter for local training
  Scope* training_scope_;

  // parameter for delta calc and send
  std::shared_ptr<Scope> delta_scope_;

  // parameter for storage the pserver param after last recv
  std::shared_ptr<Scope> old_scope_;

  // parameter on pserver
  std::shared_ptr<Scope> pserver_scope_;

  // if var is sparse, using selected rows, bool=true
  std::unordered_map<std::string, bool> var_list_;
496 497 498 499 500

  std::shared_ptr<BlockingQueue<std::shared_ptr<SparseIdsMap>>>
      need_push_queue_;
  std::vector<SparseIdsMap> ids_send_vec_;

C
Chengmo 已提交
501
  std::unordered_map<std::string, std::vector<int64_t>> absolute_section_;
502
  std::unordered_map<std::string, int64_t> vars_first_dimension_;
C
Chengmo 已提交
503

504 505
  std::unique_ptr<::ThreadPool> send_threadpool_{nullptr};
  std::unique_ptr<std::thread> send_thread_{nullptr};
C
Chengmo 已提交
506 507

  size_t need_thread_nums_{0};
508 509
};

Q
Qiao Longfei 已提交
510 511 512
}  // namespace distributed
}  // namespace operators
}  // namespace paddle