Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
c8de7284
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c8de7284
编写于
10月 13, 2019
作者:
Z
Zeng Jinle
提交者:
XiaoguangHu
10月 13, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix cuda dev_ctx by event, test=release/1.6 (#20559)
上级
66c084b7
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
29 addition
and
4 deletion
+29
-4
paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc
...le/fluid/framework/details/sparse_all_reduce_op_handle.cc
+27
-4
paddle/fluid/framework/details/sparse_all_reduce_op_handle.h
paddle/fluid/framework/details/sparse_all_reduce_op_handle.h
+2
-0
未找到文件。
paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc
浏览文件 @
c8de7284
...
@@ -13,6 +13,7 @@
...
@@ -13,6 +13,7 @@
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/framework/details/sparse_all_reduce_op_handle.h"
#include "paddle/fluid/framework/details/sparse_all_reduce_op_handle.h"
#include <algorithm>
#include <algorithm>
#include <utility>
#include "dgc/dgc.h"
#include "dgc/dgc.h"
#include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/reduce_and_gather.h"
#include "paddle/fluid/framework/details/reduce_and_gather.h"
...
@@ -41,11 +42,30 @@ SparseAllReduceOpHandle::SparseAllReduceOpHandle(
...
@@ -41,11 +42,30 @@ SparseAllReduceOpHandle::SparseAllReduceOpHandle(
}
}
}
}
void
SparseAllReduceOpHandle
::
WaitInputVarGenerated
()
{
#ifdef PADDLE_WITH_CUDA
for
(
auto
&
p
:
dev_ctxes_
)
{
if
(
platform
::
is_gpu_place
(
p
.
first
))
{
int
dev_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
p
.
first
).
device
;
auto
*
compute_dev_ctx
=
platform
::
DeviceContextPool
::
Instance
().
GetByPlace
(
platform
::
CUDAPlace
(
dev_id
));
auto
*
dev_ctx
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
p
.
second
);
if
(
compute_dev_ctx
->
stream
()
!=
dev_ctx
->
stream
())
{
auto
&
event
=
events_
.
at
(
dev_id
);
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaEventRecord
(
event
,
compute_dev_ctx
->
stream
()));
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaStreamWaitEvent
(
dev_ctx
->
stream
(),
event
,
0
));
}
}
}
#endif
}
void
SparseAllReduceOpHandle
::
RunImplEncoded
()
{
void
SparseAllReduceOpHandle
::
RunImplEncoded
()
{
platform
::
RecordEvent
record_event
(
Name
());
platform
::
RecordEvent
record_event
(
Name
());
WaitInputVarGenerated
();
auto
in_var_handles
=
DynamicCast
<
VarHandle
>
(
this
->
Inputs
());
auto
in_var_handles
=
DynamicCast
<
VarHandle
>
(
this
->
Inputs
());
auto
out_var_handles
=
DynamicCast
<
VarHandle
>
(
this
->
Outputs
());
auto
out_var_handles
=
DynamicCast
<
VarHandle
>
(
this
->
Outputs
());
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
...
@@ -87,6 +107,8 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
...
@@ -87,6 +107,8 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
PADDLE_ENFORCE
(
nranks_
>
1
);
PADDLE_ENFORCE
(
nranks_
>
1
);
std
::
vector
<
std
::
function
<
void
()
>>
all_reduce_calls
;
std
::
vector
<
std
::
function
<
void
()
>>
all_reduce_calls
;
std
::
vector
<
memory
::
AllocationPtr
>
allocations
;
for
(
size_t
i
=
0
;
i
<
local_scopes_
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
local_scopes_
.
size
();
++
i
)
{
auto
&
place
=
places_
[
i
];
auto
&
place
=
places_
[
i
];
auto
&
in
=
*
ins
[
i
];
auto
&
in
=
*
ins
[
i
];
...
@@ -104,7 +126,6 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
...
@@ -104,7 +126,6 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
int
dev_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place
).
device
;
int
dev_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place
).
device
;
auto
*
nccl_ctxs
=
nccl_ctxs_
->
GetRunEnvNCCLCtx
(
run_order_
,
false
);
auto
*
nccl_ctxs
=
nccl_ctxs_
->
GetRunEnvNCCLCtx
(
run_order_
,
false
);
auto
&
nccl_ctx
=
nccl_ctxs
->
at
(
dev_id
);
auto
&
nccl_ctx
=
nccl_ctxs
->
at
(
dev_id
);
auto
*
dev_ctx
=
nccl_ctxs
->
DevCtx
(
dev_id
);
auto
stream
=
nccl_ctx
.
stream
();
auto
stream
=
nccl_ctx
.
stream
();
auto
comm
=
nccl_ctx
.
comm_
;
auto
comm
=
nccl_ctx
.
comm_
;
...
@@ -112,8 +133,9 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
...
@@ -112,8 +133,9 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
// dgc use ncclAllGather to get all the encoded data
// dgc use ncclAllGather to get all the encoded data
// so the buffer need nranks.
// so the buffer need nranks.
int
buf_size
=
nranks_
*
encode_size
;
int
buf_size
=
nranks_
*
encode_size
;
auto
tmp_ious_data
=
memory
::
Alloc
(
*
dev_ctx
,
buf_size
);
auto
tmp_ious_data
=
memory
::
Alloc
(
place
,
buf_size
);
void
*
gather_buff
=
reinterpret_cast
<
void
*>
(
tmp_ious_data
->
ptr
());
void
*
gather_buff
=
reinterpret_cast
<
void
*>
(
tmp_ious_data
->
ptr
());
allocations
.
emplace_back
(
std
::
move
(
tmp_ious_data
));
VLOG
(
10
)
<<
"in_numel:"
<<
in_numel
<<
", out_numel:"
<<
out_numel
VLOG
(
10
)
<<
"in_numel:"
<<
in_numel
<<
", out_numel:"
<<
out_numel
<<
", nranks:"
<<
nranks_
<<
", gather_buf size:"
<<
buf_size
<<
", nranks:"
<<
nranks_
<<
", gather_buf size:"
<<
buf_size
...
@@ -126,6 +148,7 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
...
@@ -126,6 +148,7 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
});
});
}
}
WaitInputVarGenerated
();
NCCLAllReduceFunc
(
all_reduce_calls
);
NCCLAllReduceFunc
(
all_reduce_calls
);
}
}
...
...
paddle/fluid/framework/details/sparse_all_reduce_op_handle.h
浏览文件 @
c8de7284
...
@@ -36,6 +36,8 @@ class SparseAllReduceOpHandle : public AllReduceOpHandle {
...
@@ -36,6 +36,8 @@ class SparseAllReduceOpHandle : public AllReduceOpHandle {
bool
is_encoded
=
false
,
int
nranks
=
-
1
);
bool
is_encoded
=
false
,
int
nranks
=
-
1
);
std
::
string
Name
()
const
override
;
std
::
string
Name
()
const
override
;
void
WaitInputVarGenerated
()
override
;
protected:
protected:
void
RunImpl
()
override
;
void
RunImpl
()
override
;
int
GetKValue
(
const
std
::
string
&
grad_name
);
int
GetKValue
(
const
std
::
string
&
grad_name
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录