communicator.h 16.0 KB
Newer Older
Q
Qiao Longfei 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

17
#include <ThreadPool.h>
18
#include <atomic>
Q
Qiao Longfei 已提交
19
#include <deque>
20
#include <map>
Q
Qiao Longfei 已提交
21 22
#include <memory>
#include <string>
Q
Qiao Longfei 已提交
23
#include <unordered_map>
24
#include <unordered_set>
Q
Qiao Longfei 已提交
25
#include <utility>
Q
Qiao Longfei 已提交
26
#include <vector>
27
#include "gflags/gflags.h"
Q
Qiao Longfei 已提交
28 29 30

#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/variable.h"
C
Chengmo 已提交
31 32
#include "paddle/fluid/operators/distributed/distributed.h"
#include "paddle/fluid/operators/distributed/rpc_client.h"
Q
Qiao Longfei 已提交
33
#include "paddle/fluid/operators/distributed/rpc_common.h"
C
Chengmo 已提交
34
#include "paddle/fluid/operators/distributed_ops/send_recv_util.h"
Q
Qiao Longfei 已提交
35 36
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
Q
Qiao Longfei 已提交
37 38 39 40
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"

41 42
DECLARE_bool(communicator_is_sgd_optimizer);

Q
Qiao Longfei 已提交
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
namespace paddle {
namespace operators {
namespace distributed {

using Scope = framework::Scope;
using Variable = framework::Variable;

template <typename T>
class BlockingQueue {
 public:
  explicit BlockingQueue(size_t capacity) : capacity_(capacity) {
    PADDLE_ENFORCE_GT(capacity_, 0, "The capacity must be greater than 0.");
  }

  bool Push(const T& elem) {
Q
Qiao Longfei 已提交
58 59 60 61 62 63 64
    {
      std::unique_lock<std::mutex> lock(mutex_);
      cv_.wait(lock, [&] { return queue_.size() < capacity_; });
      PADDLE_ENFORCE_LT(queue_.size(), capacity_);
      queue_.push_back(elem);
    }
    cv_.notify_one();
Q
Qiao Longfei 已提交
65 66 67 68
    return true;
  }

  bool Push(T&& elem) {
Q
Qiao Longfei 已提交
69 70 71 72 73 74 75
    {
      std::unique_lock<std::mutex> lock(mutex_);
      cv_.wait(lock, [&] { return queue_.size() < capacity_; });
      PADDLE_ENFORCE_LT(queue_.size(), capacity_);
      queue_.emplace_back(std::move(elem));
    }
    cv_.notify_one();
Q
Qiao Longfei 已提交
76 77 78 79 80
    return true;
  }

  T Pop() {
    std::unique_lock<std::mutex> lock(mutex_);
Q
Qiao Longfei 已提交
81
    cv_.wait(lock, [=] { return !queue_.empty(); });
Q
Qiao Longfei 已提交
82 83
    T rc(std::move(queue_.front()));
    queue_.pop_front();
Q
Qiao Longfei 已提交
84
    cv_.notify_one();
Q
Qiao Longfei 已提交
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
    return rc;
  }

  size_t Cap() const {
    std::lock_guard<std::mutex> lock(mutex_);
    return capacity_;
  }

  size_t Size() const {
    std::lock_guard<std::mutex> lock(mutex_);
    return queue_.size();
  }

 private:
  const size_t capacity_;
  std::deque<T> queue_;

  mutable std::mutex mutex_;
Q
Qiao Longfei 已提交
103
  std::condition_variable cv_;
Q
Qiao Longfei 已提交
104 105
};

Q
Qiao Longfei 已提交
106 107 108 109
template <typename T, int MajorType = Eigen::RowMajor,
          typename IndexType = Eigen::DenseIndex>
using EigenVector = framework::EigenVector<T, MajorType, IndexType>;

1
123malin 已提交
110
template <typename T>
Q
Qiao Longfei 已提交
111 112
inline void MergeVars(const std::string& var_name,
                      const std::vector<std::shared_ptr<Variable>>& vars,
1
123malin 已提交
113
                      Scope* scope, bool merge_add = true) {
Q
Qiao Longfei 已提交
114 115 116 117 118 119
  PADDLE_ENFORCE(!vars.empty(), "should have value to merge!");
  auto cpu_place = platform::CPUPlace();
  auto& var0 = vars[0];
  auto* out_var = scope->Var(var_name);
  if (var0->IsType<framework::LoDTensor>()) {
    auto dims = var0->Get<framework::LoDTensor>().dims();
1
123malin 已提交
120 121
    VLOG(3) << "merge " << var_name << " LoDTensor dims " << dims
            << "; merge add: " << merge_add;
Q
Qiao Longfei 已提交
122 123
    // init output tensor
    auto* out_t = out_var->GetMutable<framework::LoDTensor>();
1
123malin 已提交
124
    out_t->mutable_data<T>(dims, cpu_place);
Q
Qiao Longfei 已提交
125 126 127 128 129 130 131 132
    // check the input dims
    for (auto& var : vars) {
      auto& var_t = var->Get<framework::LoDTensor>();
      PADDLE_ENFORCE_EQ(var_t.dims(), dims, "should have the same dims");
    }

    // set output tensor to 0.
    auto cpu_ctx = paddle::platform::CPUDeviceContext();
1
123malin 已提交
133 134
    math::SetConstant<paddle::platform::CPUDeviceContext, T> constant_functor;
    constant_functor(cpu_ctx, out_t, static_cast<T>(0));
Q
Qiao Longfei 已提交
135
    // sum all vars to out
1
123malin 已提交
136
    auto result = EigenVector<T>::Flatten(*out_t);
Q
Qiao Longfei 已提交
137 138
    for (auto& var : vars) {
      auto& in_t = var->Get<framework::LoDTensor>();
1
123malin 已提交
139
      auto in = EigenVector<T>::Flatten(in_t);
Q
Qiao Longfei 已提交
140 141
      result.device(*cpu_ctx.eigen_device()) = result + in;
    }
1
123malin 已提交
142
    if (!merge_add) {
143
      result.device(*cpu_ctx.eigen_device()) =
1
123malin 已提交
144
          result / static_cast<T>(vars.size());
145
    }
Q
Qiao Longfei 已提交
146 147 148 149
  } else if (var0->IsType<framework::SelectedRows>()) {
    auto& slr0 = var0->Get<framework::SelectedRows>();
    auto* out_slr = out_var->GetMutable<framework::SelectedRows>();
    out_slr->mutable_rows()->clear();
1
123malin 已提交
150
    out_slr->mutable_value()->mutable_data<T>({{}}, cpu_place);
Q
Qiao Longfei 已提交
151 152 153 154 155 156
    std::vector<const paddle::framework::SelectedRows*> inputs;
    inputs.reserve(vars.size());
    for (auto& var : vars) {
      inputs.push_back(&var->Get<framework::SelectedRows>());
    }
    auto dev_ctx = paddle::platform::CPUDeviceContext();
1
123malin 已提交
157 158
    if (merge_add) {
      math::scatter::MergeAdd<paddle::platform::CPUDeviceContext, T> merge_add;
159 160
      merge_add(dev_ctx, inputs, out_slr);
    } else {
1
123malin 已提交
161
      math::scatter::MergeAverage<paddle::platform::CPUDeviceContext, T>
162 163 164 165
          merge_average;
      merge_average(dev_ctx, inputs, out_slr);
    }

Q
Qiao Longfei 已提交
166
    VLOG(3) << "merge " << var_name << " SelectedRows height: " << slr0.height()
1
123malin 已提交
167
            << " dims: " << slr0.value().dims() << "; merge add: " << merge_add;
Q
Qiao Longfei 已提交
168 169 170 171 172
  } else {
    PADDLE_THROW("unsupported var type!");
  }
}

Q
Qiao Longfei 已提交
173 174
using RpcCtxMap = std::unordered_map<std::string, RpcContext>;

Q
Qiao Longfei 已提交
175 176
class Communicator {
 public:
1
123malin 已提交
177
  Communicator();
178
  explicit Communicator(const std::map<std::string, std::string>& envs);
T
tangwei12 已提交
179
  virtual ~Communicator() {}
Q
Qiao Longfei 已提交
180

T
tangwei12 已提交
181 182 183
  virtual void Start() = 0;
  virtual void Stop() = 0;
  virtual bool IsRunning() { return running_; }
Q
Qiao Longfei 已提交
184

185 186
  virtual void Send(const std::vector<std::string>& var_names,
                    const std::vector<std::string>& var_tables,
187 188
                    const framework::Scope& scope) = 0;

T
tangwei12 已提交
189
  virtual void Recv() = 0;
Q
Qiao Longfei 已提交
190

191 192 193 194
  virtual void Barrier() {}
  virtual void BarrierTriggerDecrement() {}
  virtual void BarrierTriggerReset(int init_counter) {}

T
tangwei12 已提交
195 196
  virtual void InitImpl(const RpcCtxMap& send_varname_to_ctx,
                        const RpcCtxMap& recv_varname_to_ctx,
197
                        Scope* recv_scope) {}
T
tangwei12 已提交
198 199
  virtual void InitImpl(const paddle::framework::ProgramDesc& program,
                        Scope* recv_scope) = 0;
Q
Qiao Longfei 已提交
200

T
tangwei12 已提交
201 202 203 204
  static Communicator* GetInstance() { return communicator_.get(); }
  static std::shared_ptr<Communicator> GetInstantcePtr() {
    return communicator_;
  }
205 206
  template <typename T>
  static Communicator* InitInstance(
1
123malin 已提交
207
      const paddle::framework::ProgramDesc& program, Scope* recv_scope,
208
      const std::map<std::string, std::string>& envs) {
209
    std::call_once(init_flag_, &Communicator::InitWithProgram<T>, program,
210
                   recv_scope, std::ref(envs));
211 212 213
    return communicator_.get();
  }

T
tangwei12 已提交
214 215
  template <typename T>
  static void InitWithProgram(const paddle::framework::ProgramDesc& program,
1
123malin 已提交
216
                              Scope* recv_scope,
217
                              const std::map<std::string, std::string>& envs) {
T
tangwei12 已提交
218
    if (communicator_.get() == nullptr) {
219
      communicator_.reset(new T(std::ref(envs)));
T
tangwei12 已提交
220 221 222 223 224 225 226 227
      communicator_->InitImpl(program, recv_scope);
    }
  }

 protected:
  bool running_ = false;
  static std::shared_ptr<Communicator> communicator_;
  static std::once_flag init_flag_;
228
  std::unordered_map<std::string, std::string> envs;
T
tangwei12 已提交
229 230
};

231
using SparseIdsMap =
C
Chengmo 已提交
232
    std::unordered_map<std::string, std::vector<std::unordered_set<int64_t>>>;
233

T
tangwei12 已提交
234 235
class AsyncCommunicator : public Communicator {
 public:
1
123malin 已提交
236
  AsyncCommunicator() : Communicator() {}
237 238 239 240 241 242 243 244 245 246 247 248 249
  explicit AsyncCommunicator(const std::map<std::string, std::string>& envs)
      : Communicator(envs) {
    independent_recv_thread_ = static_cast<bool>(
        std::stoi(envs.at("communicator_independent_recv_thread")));
    min_send_grad_num_before_recv_ =
        std::stoi(envs.at("communicator_min_send_grad_num_before_recv"));
    thread_pool_size_ = std::stoi(envs.at("communicator_thread_pool_size"));
    max_merge_var_num_ = std::stoi(envs.at("communicator_max_merge_var_num"));
    send_wait_times_ = std::stoi(envs.at("communicator_send_wait_times"));
    send_queue_size_ = std::stoi(envs.at("communicator_send_queue_size"));
    is_sgd_optimizer_ =
        static_cast<bool>(std::stoi(envs.at("communicator_is_sgd_optimizer")));
  }
T
tangwei12 已提交
250 251 252 253 254
  ~AsyncCommunicator();
  void Start() override;
  void Stop() override;

  void Recv() override;
Q
Qiao Longfei 已提交
255
  void RecvAll();
T
tangwei12 已提交
256 257 258 259 260 261 262 263

  void InitImpl(const RpcCtxMap& send_varname_to_ctx,
                const RpcCtxMap& recv_varname_to_ctx,
                Scope* recv_scope) override;

  void InitImpl(const paddle::framework::ProgramDesc& program,
                Scope* recv_scope) override;

Q
Qiao Longfei 已提交
264 265 266
  void SendThread();
  void RecvThread();

267 268
  void Send(const std::vector<std::string>& var_names,
            const std::vector<std::string>& var_tables,
269 270
            const framework::Scope& scope) override;

271 272 273 274 275 276 277 278
 private:
  int min_send_grad_num_before_recv_;
  int thread_pool_size_;
  int max_merge_var_num_;
  int send_wait_times_;
  int send_queue_size_;
  bool independent_recv_thread_;
  bool is_sgd_optimizer_;
279

T
tangwei12 已提交
280
 private:
Q
Qiao Longfei 已提交
281 282 283
  std::unordered_map<std::string,
                     std::shared_ptr<BlockingQueue<std::shared_ptr<Variable>>>>
      send_varname_to_queue_;
Q
Qiao Longfei 已提交
284 285
  RpcCtxMap send_varname_to_ctx_;
  RpcCtxMap recv_varname_to_ctx_;
286 287
  std::unique_ptr<std::thread> send_thread_{nullptr};
  std::unique_ptr<std::thread> recv_thread_{nullptr};
Q
Qiao Longfei 已提交
288 289
  Scope* recv_scope_;                  // should be global scope
  std::unique_ptr<Scope> send_scope_;  // an independent scope
Q
Qiao Longfei 已提交
290 291
  std::unique_ptr<::ThreadPool> send_threadpool_{nullptr};
  std::unique_ptr<::ThreadPool> recv_threadpool_{nullptr};
292
  std::atomic_uint grad_num_{0};  // the num of gradient sent since last recv
Q
Qiao Longfei 已提交
293 294
};

295
class HalfAsyncCommunicator : public Communicator {
296
 public:
297 298 299 300 301 302 303 304 305
  HalfAsyncCommunicator() {}
  explicit HalfAsyncCommunicator(const std::map<std::string, std::string>& envs)
      : Communicator(envs) {
    max_merge_var_num_ = std::stoi(envs.at("communicator_max_merge_var_num"));
    send_wait_times_ = std::stoi(envs.at("communicator_send_wait_times"));
    thread_pool_size_ = std::stoi(envs.at("communicator_thread_pool_size"));
    send_queue_size_ = std::stoi(envs.at("communicator_send_queue_size"));
  }
  ~HalfAsyncCommunicator();
306 307 308
  void Start() override;
  void Stop() override;

309 310
  void Send(const std::vector<std::string>& var_names,
            const std::vector<std::string>& var_tables,
311 312 313 314
            const framework::Scope& scope) override;

  void Recv() override;

315 316 317 318 319 320
  void Barrier() override;
  void BarrierWeakUp();

  void BarrierTriggerDecrement() override;
  void BarrierTriggerReset(int initial_val) override;

321 322 323 324 325 326 327
  void InitImpl(const RpcCtxMap& send_varname_to_ctx,
                const RpcCtxMap& recv_varname_to_ctx,
                Scope* recv_scope) override;

  void InitImpl(const paddle::framework::ProgramDesc& program,
                Scope* recv_scope) override;

328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379
  void ConsumeThread();

 private:
  int max_merge_var_num_;
  int send_wait_times_;
  int thread_pool_size_;
  int send_queue_size_;

 private:
  std::unordered_map<std::string,
                     std::shared_ptr<BlockingQueue<std::shared_ptr<Variable>>>>
      send_varname_to_queue_;
  RpcCtxMap send_varname_to_ctx_;
  RpcCtxMap recv_varname_to_ctx_;
  std::unique_ptr<std::thread> consume_thread_{nullptr};
  Scope* recv_scope_;                  // should be global scope
  std::unique_ptr<Scope> send_scope_;  // an independent scope
  std::unique_ptr<::ThreadPool> consume_threadpool_{nullptr};
  std::unique_ptr<::ThreadPool> recv_threadpool_{nullptr};

  // mutex for Wait for barrier
  std::mutex barrier_mutex_;
  std::condition_variable barrier_cond_;
  std::atomic<int64_t> barrier_trigger_{0};
  std::atomic<int64_t> barrier_counter_{0};
};

class GeoSgdCommunicator : public Communicator {
 public:
  GeoSgdCommunicator() : Communicator() {}
  explicit GeoSgdCommunicator(const std::map<std::string, std::string>& envs)
      : Communicator(envs) {
    geo_need_push_nums_ = std::stoi(envs.at("geo_need_push_nums"));
    trainer_nums_ = std::stoi(envs.at("geo_trainer_nums"));
    thread_pool_size_ = std::stoi(envs.at("communicator_thread_pool_size"));
    send_wait_times_ = std::stoi(envs.at("communicator_send_wait_times"));
  }

  ~GeoSgdCommunicator();

  void Start() override;
  void Stop() override;

  void Send(const std::vector<std::string>& var_names,
            const std::vector<std::string>& var_tables,
            const framework::Scope& scope) override;

  void Recv() override;

  void InitImpl(const paddle::framework::ProgramDesc& program,
                Scope* recv_scope) override;

380 381 382 383
 private:
  void SendThread();
  std::unordered_set<int64_t> SparseIdsMerge(
      const std::vector<SparseIdsMap>& ids_send_vec,
C
Chengmo 已提交
384
      const std::string& var_name, const std::string& splited_var_name);
385

386 387
  void SendUpdateDenseVars(const std::string& var_name,
                           const std::string& splited_var_name);
388

389
  void SendUpdateSparseVars(const std::string& var_name,
C
Chengmo 已提交
390
                            const std::string& splited_var_name,
391
                            const std::unordered_set<int64_t>& ids_table);
C
Chengmo 已提交
392

393 394
  void RecvUpdateDenseVars(const std::string& var_name,
                           const std::string& splited_var_name);
C
Chengmo 已提交
395 396
  void RecvUpdateSparseVars(const std::string& var_name,
                            const std::string& splited_var_name);
397 398 399 400 401 402 403 404 405

  void GeoSgdDenseParamInit(framework::Scope* scope_x,
                            framework::Scope* scope_y,
                            const std::string var_name);

  void GeoSgdSparseParamInit(framework::Scope* scope_x,
                             framework::Scope* scope_y,
                             const std::string var_name);

C
Chengmo 已提交
406 407 408 409 410 411 412 413
  void RpcSend(const std::string& origin_var_name,
               const std::string& splited_var_name,
               const size_t& splited_var_index);

  void RpcRecv(const std::string& origin_var_name,
               const std::string& splited_var_name,
               const size_t& splited_var_index);

414 415 416 417 418 419 420 421 422 423 424 425 426
  const std::string VarToDeltaVar(const std::string var_name) {
    std::string delta_name = var_name;
    const std::string send_name = delta_name.append(".delta");
    return send_name;
  }

  const std::string DeltaVarToVar(const std::string var_name) {
    std::string origin_name = var_name;
    origin_name.erase(origin_name.find(".delta"), 6);
    const std::string param_name = origin_name;
    return param_name;
  }

C
Chengmo 已提交
427 428 429 430 431 432 433 434 435 436 437 438 439 440
  size_t GetSplitedVarIndex(const std::string var_name,
                            const std::string splited_var_name) {
    size_t index = 0;
    for (size_t i = 0;
         i < send_varname_to_ctx_[var_name].splited_var_names.size(); i++) {
      if (send_varname_to_ctx_[var_name].splited_var_names[i] ==
          splited_var_name) {
        index = i;
        break;
      }
    }
    return index;
  }

441 442
 private:
  int trainer_nums_ = 1;
443 444 445 446 447
  int geo_need_push_nums_ = 100;
  int thread_pool_size_;
  int send_wait_times_;

 private:
448 449
  int send_var_nums_ = 0;

450 451
  RpcCtxMap send_varname_to_ctx_;
  RpcCtxMap recv_varname_to_ctx_;
452 453 454 455 456 457 458 459 460 461 462 463 464 465 466

  // parameter for local training
  Scope* training_scope_;

  // parameter for delta calc and send
  std::shared_ptr<Scope> delta_scope_;

  // parameter for storage the pserver param after last recv
  std::shared_ptr<Scope> old_scope_;

  // parameter on pserver
  std::shared_ptr<Scope> pserver_scope_;

  // if var is sparse, using selected rows, bool=true
  std::unordered_map<std::string, bool> var_list_;
467 468 469 470 471

  std::shared_ptr<BlockingQueue<std::shared_ptr<SparseIdsMap>>>
      need_push_queue_;
  std::vector<SparseIdsMap> ids_send_vec_;

C
Chengmo 已提交
472
  std::unordered_map<std::string, std::vector<int64_t>> absolute_section_;
473
  std::unordered_map<std::string, int64_t> vars_first_dimension_;
C
Chengmo 已提交
474

475 476
  std::unique_ptr<::ThreadPool> send_threadpool_{nullptr};
  std::unique_ptr<std::thread> send_thread_{nullptr};
C
Chengmo 已提交
477 478

  size_t need_thread_nums_{0};
479 480
};

Q
Qiao Longfei 已提交
481 482 483
}  // namespace distributed
}  // namespace operators
}  // namespace paddle