nccl_helper.h 3.9 KB
Newer Older
Y
Yu Yang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

17
#include <thread>  // NOLINT
Y
Yu Yang 已提交
18
#include <typeindex>
19
#include <vector>
Y
Yu Yang 已提交
20 21 22 23 24 25 26 27 28 29 30 31 32
#include "paddle/fluid/platform/dynload/nccl.h"
#include "paddle/fluid/platform/enforce.h"

namespace paddle {
namespace platform {

inline ncclDataType_t ToNCCLDataType(std::type_index type) {
  if (type == typeid(float)) {  // NOLINT
    return ncclFloat;
  } else if (type == typeid(double)) {  // NOLINT
    return ncclDouble;
  } else if (type == typeid(int)) {  // NOLINT
    return ncclInt;
33 34
  } else if (type == typeid(int64_t)) {  // NOLINT
    return ncclInt64;
Y
Yu Yang 已提交
35 36 37 38 39
  } else {
    PADDLE_THROW("Not supported");
  }
}

Y
Yu Yang 已提交
40 41
class NCCLGroupGuard {
 public:
Y
Yu Yang 已提交
42 43 44 45
  inline NCCLGroupGuard() {
    mutex().lock();
    PADDLE_ENFORCE(dynload::ncclGroupStart());
  }
Y
Yu Yang 已提交
46 47 48 49 50 51 52

  inline ~NCCLGroupGuard() {
    PADDLE_ENFORCE(dynload::ncclGroupEnd());
    mutex().unlock();
  }

 private:
Y
Yu Yang 已提交
53
  static std::mutex &mutex() {
Y
Yu Yang 已提交
54 55 56 57 58
    static std::mutex mtx;
    return mtx;
  }
};

Y
Yu Yang 已提交
59 60 61 62 63
struct NCCLContext {
  std::unique_ptr<CUDADeviceContext> ctx_;
  ncclComm_t comm_;

  explicit NCCLContext(int dev_id)
Y
Yu Yang 已提交
64
      : ctx_(new CUDADeviceContext(CUDAPlace(dev_id))), comm_{nullptr} {}
Y
Yu Yang 已提交
65 66 67 68 69 70 71

  cudaStream_t stream() const { return ctx_->stream(); }

  int device_id() const {
    return boost::get<platform::CUDAPlace>(ctx_->GetPlace()).device;
  }

72
  static void InitNCCLContext(std::unordered_map<int, NCCLContext> *contexts,
Y
Yu Yang 已提交
73 74 75
                              const std::vector<platform::Place> &places) {
    std::vector<ncclComm_t> comms;
    std::vector<int> devs;
76 77
    comms.resize(contexts->size());
    devs.reserve(contexts->size());
Y
Yu Yang 已提交
78 79 80 81 82 83

    for (auto &p : places) {
      devs.push_back(boost::get<platform::CUDAPlace>(p).device);
    }

    PADDLE_ENFORCE(platform::dynload::ncclCommInitAll(
84
        &comms[0], static_cast<int>(contexts->size()), &devs[0]));
Y
Yu Yang 已提交
85 86 87

    int i = 0;
    for (auto &dev_id : devs) {
88
      contexts->at(dev_id).comm_ = comms[i++];
Y
Yu Yang 已提交
89 90 91 92
    }
  }
};

Y
Yu Yang 已提交
93 94 95 96
struct NCCLContextMap {
  std::unordered_map<int, NCCLContext> contexts_;
  std::vector<int> order_;

97
  explicit NCCLContextMap(const std::vector<platform::Place> &places) {
Y
Yu Yang 已提交
98
    PADDLE_ENFORCE(!places.empty());
Y
Yu Yang 已提交
99 100 101 102 103 104 105 106 107 108
    order_.reserve(places.size());
    for (auto &p : places) {
      int dev_id = boost::get<CUDAPlace>(p).device;
      order_.emplace_back(dev_id);
      contexts_.emplace(dev_id, NCCLContext(dev_id));
    }
    PADDLE_ENFORCE_EQ(
        order_.size(), contexts_.size(),
        "NCCL Context Map does not support contain two or more same device");

Y
Yu Yang 已提交
109 110 111
    if (places.size() > 1) {
      std::vector<ncclComm_t> comms;
      comms.resize(order_.size());
Y
Yu Yang 已提交
112

Y
Yu Yang 已提交
113 114
      PADDLE_ENFORCE(platform::dynload::ncclCommInitAll(
          &comms[0], static_cast<int>(order_.size()), &order_[0]));
Y
Yu Yang 已提交
115

Y
Yu Yang 已提交
116 117 118 119
      int i = 0;
      for (auto &dev_id : order_) {
        contexts_.at(dev_id).comm_ = comms[i++];
      }
Y
Yu Yang 已提交
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
    }
  }

  CUDADeviceContext *DevCtx(int dev_id) const { return at(dev_id).ctx_.get(); }

  CUDADeviceContext *DevCtx(platform::Place p) const {
    return DevCtx(boost::get<CUDAPlace>(p).device);
  }

  const NCCLContext &at(platform::Place p) const {
    return this->at(boost::get<CUDAPlace>(p).device);
  }

  const NCCLContext &at(int dev_id) const { return contexts_.at(dev_id); }

  void WaitAll() {
    for (auto &p : contexts_) {
      p.second.ctx_->Wait();
    }
  }
};

Y
Yu Yang 已提交
142 143
}  // namespace platform
}  // namespace paddle