未验证 提交 129859e7 编写于 作者: Q qingqing01 提交者: GitHub

Support data type int64 in NCCL. (#9818)

上级 1d88ebe4
...@@ -14,8 +14,9 @@ ...@@ -14,8 +14,9 @@
#pragma once #pragma once
#include <thread> #include <thread> // NOLINT
#include <typeindex> #include <typeindex>
#include <vector>
#include "paddle/fluid/platform/dynload/nccl.h" #include "paddle/fluid/platform/dynload/nccl.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
...@@ -29,6 +30,8 @@ inline ncclDataType_t ToNCCLDataType(std::type_index type) { ...@@ -29,6 +30,8 @@ inline ncclDataType_t ToNCCLDataType(std::type_index type) {
return ncclDouble; return ncclDouble;
} else if (type == typeid(int)) { // NOLINT } else if (type == typeid(int)) { // NOLINT
return ncclInt; return ncclInt;
} else if (type == typeid(int64_t)) { // NOLINT
return ncclInt64;
} else { } else {
PADDLE_THROW("Not supported"); PADDLE_THROW("Not supported");
} }
...@@ -66,23 +69,23 @@ struct NCCLContext { ...@@ -66,23 +69,23 @@ struct NCCLContext {
return boost::get<platform::CUDAPlace>(ctx_->GetPlace()).device; return boost::get<platform::CUDAPlace>(ctx_->GetPlace()).device;
} }
static void InitNCCLContext(std::unordered_map<int, NCCLContext> &contexts, static void InitNCCLContext(std::unordered_map<int, NCCLContext> *contexts,
const std::vector<platform::Place> &places) { const std::vector<platform::Place> &places) {
std::vector<ncclComm_t> comms; std::vector<ncclComm_t> comms;
std::vector<int> devs; std::vector<int> devs;
comms.resize(contexts.size()); comms.resize(contexts->size());
devs.reserve(contexts.size()); devs.reserve(contexts->size());
for (auto &p : places) { for (auto &p : places) {
devs.push_back(boost::get<platform::CUDAPlace>(p).device); devs.push_back(boost::get<platform::CUDAPlace>(p).device);
} }
PADDLE_ENFORCE(platform::dynload::ncclCommInitAll( PADDLE_ENFORCE(platform::dynload::ncclCommInitAll(
&comms[0], static_cast<int>(contexts.size()), &devs[0])); &comms[0], static_cast<int>(contexts->size()), &devs[0]));
int i = 0; int i = 0;
for (auto &dev_id : devs) { for (auto &dev_id : devs) {
contexts.at(dev_id).comm_ = comms[i++]; contexts->at(dev_id).comm_ = comms[i++];
} }
} }
}; };
...@@ -91,7 +94,7 @@ struct NCCLContextMap { ...@@ -91,7 +94,7 @@ struct NCCLContextMap {
std::unordered_map<int, NCCLContext> contexts_; std::unordered_map<int, NCCLContext> contexts_;
std::vector<int> order_; std::vector<int> order_;
NCCLContextMap(const std::vector<platform::Place> &places) { explicit NCCLContextMap(const std::vector<platform::Place> &places) {
order_.reserve(places.size()); order_.reserve(places.size());
for (auto &p : places) { for (auto &p : places) {
int dev_id = boost::get<CUDAPlace>(p).device; int dev_id = boost::get<CUDAPlace>(p).device;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册