nccl_wrapper.h 2.1 KB
Newer Older
X
xiexionghang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#include <atomic>
#include <ctime>
#include <map>
#include <memory>
#include <random>
#include <string>
#include <vector>
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/variable_helper.h"
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/dynload/nccl.h"
#endif
#include "paddle/fluid/platform/macros.h"  // for DISABLE_COPY_AND_ASSIGN

namespace paddle {
namespace framework {

class NCCLInfo {
 public:
  NCCLInfo() {}
  virtual ~NCCLInfo() {}

 public:
  int local_rank_;
  int global_ranks_;
  int my_global_rank_;
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
  ncclUniqueId nccl_id_;
  ncclComm_t comm_;
  cudaStream_t stream_;
#endif
};

class NCCLWrapper {
 public:
  virtual ~NCCLWrapper() {}
  NCCLWrapper() {}

  void InitNCCL();
  void SetNCCLId(const NCCLInfo& nccl_info);
  NCCLInfo GetNCCLId();
  void SetRankInfo(const int local_rank, const int global_rank,
                   const int ranks);
  void SyncVar(const int root_rank, const Scope& scope,
               const std::vector<std::string>& var_names);

  static std::shared_ptr<NCCLWrapper> GetInstance() {
    if (NULL == s_instance_) {
      s_instance_.reset(new paddle::framework::NCCLWrapper());
    }
    return s_instance_;
  }

 public:
  NCCLInfo nccl_info_;

 private:
  static std::shared_ptr<NCCLWrapper> s_instance_;

 protected:
  static bool is_initialized_;
  DISABLE_COPY_AND_ASSIGN(NCCLWrapper);
};

}  // end namespace framework
}  // end namespace paddle