Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
d8aebaf5
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d8aebaf5
编写于
10月 15, 2017
作者:
D
Dong Zhihong
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
"fix enforce error"
上级
d2be7ec3
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
35 addition
and
35 deletion
+35
-35
paddle/operators/nccl/nccl_gpu_common.cc
paddle/operators/nccl/nccl_gpu_common.cc
+18
-15
paddle/operators/nccl/nccl_gpu_common.h
paddle/operators/nccl/nccl_gpu_common.h
+4
-10
paddle/operators/nccl/nccl_ops.h
paddle/operators/nccl/nccl_ops.h
+7
-6
paddle/platform/dynload/nccl.h
paddle/platform/dynload/nccl.h
+4
-4
paddle/platform/enforce.h
paddle/platform/enforce.h
+2
-0
未找到文件。
paddle/operators/nccl/nccl_gpu_common.cc
浏览文件 @
d8aebaf5
...
...
@@ -8,27 +8,27 @@ NCCLManager::NCCLManager() {}
NCCLManager
::~
NCCLManager
()
{
for
(
auto
&
p
:
comm_table
)
{
auto
*
comm
=
p
.
second
;
auto
&
comm
=
p
.
second
;
auto
&
gpus_
=
comm
->
gpus_
;
for
(
in
t
i
=
0
;
i
<
gpus_
.
size
();
++
i
)
{
for
(
size_
t
i
=
0
;
i
<
gpus_
.
size
();
++
i
)
{
int
gid
=
gpus_
[
i
];
platform
::
SetDeviceId
(
gid
);
// mapping gid to idx
int
idx
=
gid
%
gpus_
.
size
();
// wait finish
NCCL_CHECK
(
PADDLE_ENFORCE
(
cudaStreamWaitEvent
(
*
comm
->
streams_
[
idx
],
comm
->
events_
[
idx
],
0
));
NCCL_CHECK
(
cudaEventDestroy
(
comm
->
events_
[
idx
]));
PADDLE_ENFORCE
(
cudaEventDestroy
(
comm
->
events_
[
idx
]));
NCCL_CHECK
(
ncclCommDestroy
(
comm
->
comms_
[
idx
]));
PADDLE_ENFORCE
(
ncclCommDestroy
(
comm
->
comms_
[
idx
]));
}
delete
comm
;
comm
.
reset
(
nullptr
)
;
}
}
Communicator
*
NCCLManager
::
GetCommunicator
(
const
std
::
vector
<
int
>&
gpus
)
const
{
Communicator
*
NCCLManager
::
GetCommunicator
(
const
std
::
vector
<
int
>&
gpus
)
{
std
::
string
key
;
for
(
auto
&
id
:
gpus
)
{
key
+=
std
::
to_string
(
id
);
...
...
@@ -37,21 +37,24 @@ Communicator* NCCLManager::GetCommunicator(const std::vector<int>& gpus) const {
std
::
mutex
mu
;
std
::
lock_guard
<
std
::
mutex
>
lk
(
mu
);
auto
*
comm
=
comm_table
[
key
];
if
(
comm
==
nullptr
)
{
comm
=
new
Communicator
(
gpus
.
size
());
NCCL_CHECK
(
ncclCommInitAll
(
comm
->
comms_
.
data
(),
gpus
.
size
(),
gpus
.
data
()));
auto
it
=
comm_table
.
find
(
key
);
if
(
it
->
second
==
nullptr
)
{
auto
*
comm
=
new
Communicator
(
gpus
);
PADDLE_ENFORCE
(
ncclCommInitAll
(
comm
->
comms_
.
data
(),
gpus
.
size
(),
gpus
.
data
()));
for
(
size_t
i
=
0
;
i
<
gpus
.
size
();
++
i
)
{
platform
::
SetDeviceId
(
gpus
[
i
]);
// block wait
NCCL_CHECK
(
cudaEventCreateWithFlags
(
&
events_
[
i
],
cudaEventBlockingSync
|
cudaEventDisableTiming
));
PADDLE_ENFORCE
(
cudaEventCreateWithFlags
(
&
comm
->
events_
[
i
],
cudaEventBlockingSync
|
cudaEventDisableTiming
));
}
comm_table
[
key
]
=
comm
;
comm_table
[
key
]
.
reset
(
comm
)
;
}
return
comm
;
return
comm
_table
[
key
].
get
()
;
}
}
// namespace operators
...
...
paddle/operators/nccl/nccl_gpu_common.h
浏览文件 @
d8aebaf5
#pragma once
#include <nccl.h>
#include <algorithm>
#include <condition_variable>
...
...
@@ -10,17 +9,11 @@
#include <vector>
#include "paddle/platform/device_context.h"
#include "paddle/platform/enforce.h"
namespace
paddle
{
namespace
platform
{
#define NCCL_CHECK(condition) \
do { \
ncclResult_t ret = (condition); \
PADDLE_ENFORCE(ret == ncclSuccess, "Error invoking NCCL: ", __FILE__, \
__LINE__, ncclGetErrorString(ret)); \
} while (0)
class
WaitGroup
{
public:
inline
void
Add
(
int
n
)
{
...
...
@@ -101,7 +94,7 @@ class NCCLManager {
~
NCCLManager
();
// for each card only have one communicator
Communicator
*
GetCommunicator
(
const
std
::
vector
<
int
>&
gpus
)
const
;
Communicator
*
GetCommunicator
(
const
std
::
vector
<
int
>&
gpus
);
private:
// // the gpu id list available. Note that only support
...
...
@@ -109,7 +102,8 @@ class NCCLManager {
// std::vector<int> _gpu_worlds;
// communicator list
std
::
unordered_map
<
std
::
string
/* key*/
,
Communicator
*>
comm_table
;
std
::
unordered_map
<
std
::
string
/* key*/
,
std
::
unique_ptr
<
Communicator
>>
comm_table
;
};
}
// namespace operators
...
...
paddle/operators/nccl/nccl_ops.h
浏览文件 @
d8aebaf5
...
...
@@ -54,14 +54,15 @@ class NCCLAllReduceKernel : public framework::OpKernel {
comm
->
streams_
[
idx
]
=
stream
;
for
(
size_t
i
=
0
;
i
<
ins
.
size
();
++
i
)
{
NCCL_CHECK
(
ncclAllReduce
(
ins
[
i
]
->
data
<
T
>
(),
outs
[
i
]
->
mutable_data
<
T
>
(),
outs
[
i
]
->
numel
()
*
sizeof
(
T
),
NCCLTypeWrapper
<
T
>::
type
,
op_type
,
&
comm
->
comms_
[
idx
],
comm
->
streams_
[
idx
]));
NCCL_CHECK
(
cudaEventRecord
(
comm
->
events_
[
idx
],
*
comms_
->
streams_
[
idx
]));
PADDLE_ENFORCE
(
ncclAllReduce
(
ins
[
i
]
->
data
<
T
>
(),
outs
[
i
]
->
mutable_data
<
T
>
(),
outs
[
i
]
->
numel
()
*
sizeof
(
T
),
NCCLTypeWrapper
<
T
>::
type
,
op_type
,
&
comm
->
comms_
[
idx
],
comm
->
streams_
[
idx
]));
PADDLE_ENFORCE
(
cudaEventRecord
(
comm
->
events_
[
idx
],
*
comms_
->
streams_
[
idx
]));
// wait finish
NCCL_CHECK
(
PADDLE_ENFORCE
(
cudaStreamWaitEvent
(
comm
->
streams_
[
idx
],
comm
->
events_
[
idx
],
0
));
}
...
...
paddle/platform/dynload/nccl.h
浏览文件 @
d8aebaf5
...
...
@@ -30,13 +30,13 @@ extern void* nccl_dso_handle;
#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
ncclResult_t operator()(Args... args) {
\
typedef ncclResult_t (*ncclFunc)(Args...);
\
auto operator()(Args... args) -> decltype(__name(args...)) {
\
using nccl_func = decltype(__name(args...)) (*)(Args...);
\
std::call_once(nccl_dso_flag, \
paddle::platform::dynload::GetNcclDsoHandle, \
&nccl_dso_handle); \
void* p_##__name = dlsym(nccl_dso_handle, #__name); \
return reinterpret_cast<nccl
Func>(p_##__name)(args...);
\
return reinterpret_cast<nccl
_func>(p_##__name)(args...);
\
} \
}; \
extern DynLoad__##__name __name
...
...
@@ -65,7 +65,7 @@ extern void* nccl_dso_handle;
__macro(ncclReduce); \
__macro(ncclGetErrorString);
NCCL_RAND_ROUTINE_EACH
(
DECLARE_DYNAMIC_LOAD_NCCL_WRAP
)
;
NCCL_RAND_ROUTINE_EACH
(
DECLARE_DYNAMIC_LOAD_NCCL_WRAP
)
}
// namespace dynload
}
// namespace platform
...
...
paddle/platform/enforce.h
浏览文件 @
d8aebaf5
...
...
@@ -29,6 +29,8 @@ limitations under the License. */
#include <cxxabi.h> // for __cxa_demangle
#endif
#include <glog/logging.h>
#ifdef PADDLE_WITH_CUDA
#include "paddle/platform/dynload/cublas.h"
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录