Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
9b3f48d7
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
9b3f48d7
编写于
6月 21, 2018
作者:
Y
Yu Yang
提交者:
GitHub
6月 21, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #11616 from chengduoZH/fix_parallel_exe
Enhance Parallel Executor stable
上级
bcea248b
c99fca5f
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
88 addition
and
24 deletion
+88
-24
paddle/fluid/framework/details/broadcast_op_handle.cc
paddle/fluid/framework/details/broadcast_op_handle.cc
+48
-20
paddle/fluid/framework/details/multi_devices_graph_builder.cc
...le/fluid/framework/details/multi_devices_graph_builder.cc
+2
-3
paddle/fluid/framework/details/op_handle_base.cc
paddle/fluid/framework/details/op_handle_base.cc
+23
-0
paddle/fluid/framework/details/op_handle_base.h
paddle/fluid/framework/details/op_handle_base.h
+4
-0
paddle/fluid/framework/details/reduce_op_handle.cc
paddle/fluid/framework/details/reduce_op_handle.cc
+3
-1
paddle/fluid/platform/device_context.h
paddle/fluid/platform/device_context.h
+8
-0
未找到文件。
paddle/fluid/framework/details/broadcast_op_handle.cc
浏览文件 @
9b3f48d7
...
@@ -73,6 +73,9 @@ void BroadcastOpHandle::RunImpl() {
...
@@ -73,6 +73,9 @@ void BroadcastOpHandle::RunImpl() {
int
root_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
in_tensor
.
place
()).
device
;
int
root_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
in_tensor
.
place
()).
device
;
std
::
vector
<
std
::
function
<
void
()
>>
broadcast_calls
;
std
::
vector
<
std
::
function
<
void
()
>>
broadcast_calls
;
int
type
=
platform
::
ToNCCLDataType
(
in_tensor
.
type
());
size_t
numel
=
static_cast
<
size_t
>
(
in_tensor
.
numel
());
for
(
auto
out_var_handle
:
out_var_handles
)
{
for
(
auto
out_var_handle
:
out_var_handles
)
{
Variable
*
out_var
=
var_scopes
.
at
(
out_var_handle
->
scope_idx_
)
Variable
*
out_var
=
var_scopes
.
at
(
out_var_handle
->
scope_idx_
)
->
FindVar
(
out_var_handle
->
name_
);
->
FindVar
(
out_var_handle
->
name_
);
...
@@ -87,13 +90,11 @@ void BroadcastOpHandle::RunImpl() {
...
@@ -87,13 +90,11 @@ void BroadcastOpHandle::RunImpl() {
send_recv_buffer
=
const_cast
<
void
*>
(
in_tensor
.
data
<
void
>
());
send_recv_buffer
=
const_cast
<
void
*>
(
in_tensor
.
data
<
void
>
());
out_handle
=
out_var_handle
;
out_handle
=
out_var_handle
;
}
else
{
}
else
{
send_recv_buffer
=
send_recv_buffer
=
VariableVisitor
::
GetMutableTensor
(
out_var
)
VariableVisitor
::
GetMutableTensor
(
out_var
).
mutable_data
(
.
Resize
(
in_tensor
.
dims
())
out_var_handle
->
place_
);
.
mutable_data
(
out_var_handle
->
place_
);
}
}
int
type
=
platform
::
ToNCCLDataType
(
in_tensor
.
type
());
size_t
numel
=
static_cast
<
size_t
>
(
in_tensor
.
numel
());
broadcast_calls
.
emplace_back
(
broadcast_calls
.
emplace_back
(
[
send_recv_buffer
,
numel
,
type
,
root_id
,
&
nccl_ctx
]
{
[
send_recv_buffer
,
numel
,
type
,
root_id
,
&
nccl_ctx
]
{
PADDLE_ENFORCE
(
platform
::
dynload
::
ncclBcast
(
PADDLE_ENFORCE
(
platform
::
dynload
::
ncclBcast
(
...
@@ -102,23 +103,50 @@ void BroadcastOpHandle::RunImpl() {
...
@@ -102,23 +103,50 @@ void BroadcastOpHandle::RunImpl() {
});
});
}
}
this
->
RunAndRecordEvent
([
&
]
{
// FIXME(zcd): a temporary fix for some language model that has sparse
{
// parameter.
platform
::
NCCLGroupGuard
guard
;
bool
use_mutex
=
true
;
for
(
auto
&
call
:
broadcast_calls
)
{
if
(
in_var
->
IsType
<
paddle
::
framework
::
SelectedRows
>
())
{
call
();
use_mutex
=
false
;
}
if
(
use_mutex
)
{
this
->
RunAndRecordEvent
([
&
]
{
{
platform
::
NCCLGroupGuard
guard
;
for
(
auto
&
call
:
broadcast_calls
)
{
call
();
}
}
}
}
if
(
!
out_handle
->
IsTheSameVar
(
*
in_var_handle
))
{
if
(
!
out_handle
->
IsTheSameVar
(
*
in_var_handle
))
{
auto
out_var
=
var_scopes
.
at
(
in_var_handle
->
scope_idx_
)
auto
out_var
=
var_scopes
.
at
(
in_var_handle
->
scope_idx_
)
->
FindVar
(
out_var_handles
[
0
]
->
name_
);
->
FindVar
(
out_var_handles
[
0
]
->
name_
);
paddle
::
framework
::
TensorCopy
(
paddle
::
framework
::
TensorCopy
(
in_tensor
,
in_var_handle
->
place_
,
in_tensor
,
in_var_handle
->
place_
,
*
(
dev_ctxes_
.
at
(
in_var_handle
->
place_
)),
*
(
dev_ctxes_
.
at
(
in_var_handle
->
place_
)),
&
VariableVisitor
::
GetMutableTensor
(
out_var
));
&
VariableVisitor
::
GetMutableTensor
(
out_var
));
}
}
});
});
}
else
{
this
->
RunAndRecordEventNoMutex
([
&
]
{
{
platform
::
NCCLGroupGuard
guard
;
for
(
auto
&
call
:
broadcast_calls
)
{
call
();
}
}
if
(
!
out_handle
->
IsTheSameVar
(
*
in_var_handle
))
{
auto
out_var
=
var_scopes
.
at
(
in_var_handle
->
scope_idx_
)
->
FindVar
(
out_var_handles
[
0
]
->
name_
);
paddle
::
framework
::
TensorCopy
(
in_tensor
,
in_var_handle
->
place_
,
*
(
dev_ctxes_
.
at
(
in_var_handle
->
place_
)),
&
VariableVisitor
::
GetMutableTensor
(
out_var
));
}
});
}
#else
#else
PADDLE_THROW
(
"CUDA is not enabled."
);
PADDLE_THROW
(
"CUDA is not enabled."
);
#endif
#endif
...
...
paddle/fluid/framework/details/multi_devices_graph_builder.cc
浏览文件 @
9b3f48d7
...
@@ -351,7 +351,7 @@ void MultiDevSSAGraphBuilder::InsertAllReduceOp(SSAGraph *result,
...
@@ -351,7 +351,7 @@ void MultiDevSSAGraphBuilder::InsertAllReduceOp(SSAGraph *result,
auto
&
prev_grad
=
vars
.
back
();
auto
&
prev_grad
=
vars
.
back
();
op_handle
->
AddInput
(
prev_grad
.
get
());
op_handle
->
AddInput
(
prev_grad
.
get
());
auto
var
=
new
VarHandle
(
vars
.
size
()
-
1
,
i
,
og
,
p
);
auto
var
=
new
VarHandle
(
vars
.
size
(),
i
,
og
,
p
);
vars
.
emplace_back
(
var
);
vars
.
emplace_back
(
var
);
op_handle
->
AddOutput
(
var
);
op_handle
->
AddOutput
(
var
);
}
}
...
@@ -447,8 +447,7 @@ VarHandle *MultiDevSSAGraphBuilder::CreateReduceOp(SSAGraph *result,
...
@@ -447,8 +447,7 @@ VarHandle *MultiDevSSAGraphBuilder::CreateReduceOp(SSAGraph *result,
op_handle
->
AddInput
(
prev_grad
.
get
());
op_handle
->
AddInput
(
prev_grad
.
get
());
}
}
auto
&
vars
=
result
->
vars_
[
dst_dev_id
][
og
];
auto
&
vars
=
result
->
vars_
[
dst_dev_id
][
og
];
auto
var
=
auto
var
=
new
VarHandle
(
vars
.
size
(),
dst_dev_id
,
og
,
places_
[
dst_dev_id
]);
new
VarHandle
(
vars
.
size
()
-
1
,
dst_dev_id
,
og
,
places_
[
dst_dev_id
]);
vars
.
emplace_back
(
var
);
vars
.
emplace_back
(
var
);
op_handle
->
AddOutput
(
var
);
op_handle
->
AddOutput
(
var
);
return
var
;
return
var
;
...
...
paddle/fluid/framework/details/op_handle_base.cc
浏览文件 @
9b3f48d7
...
@@ -139,6 +139,29 @@ void OpHandleBase::RunAndRecordEvent(const std::function<void()> &callback) {
...
@@ -139,6 +139,29 @@ void OpHandleBase::RunAndRecordEvent(const std::function<void()> &callback) {
#endif
#endif
}
}
void
OpHandleBase
::
RunAndRecordEventNoMutex
(
const
std
::
function
<
void
()
>
&
callback
)
{
#ifdef PADDLE_WITH_CUDA
if
(
!
events_
.
empty
())
{
// Use event
std
::
function
<
void
()
>
method
=
callback
;
for
(
auto
&
p
:
dev_ctxes_
)
{
method
=
[
method
,
p
,
this
]()
{
static_cast
<
platform
::
CUDADeviceContext
*>
(
p
.
second
)
->
RecordEventNoMutex
(
events_
.
at
(
boost
::
get
<
platform
::
CUDAPlace
>
(
p
.
first
).
device
),
method
);
};
}
method
();
}
else
{
#endif
callback
();
#ifdef PADDLE_WITH_CUDA
}
#endif
}
void
OpHandleBase
::
RunAndRecordEvent
(
platform
::
Place
p
,
void
OpHandleBase
::
RunAndRecordEvent
(
platform
::
Place
p
,
const
std
::
function
<
void
()
>
&
callback
)
{
const
std
::
function
<
void
()
>
&
callback
)
{
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
...
...
paddle/fluid/framework/details/op_handle_base.h
浏览文件 @
9b3f48d7
...
@@ -85,6 +85,10 @@ class OpHandleBase {
...
@@ -85,6 +85,10 @@ class OpHandleBase {
protected:
protected:
void
RunAndRecordEvent
(
const
std
::
function
<
void
()
>
&
callback
);
void
RunAndRecordEvent
(
const
std
::
function
<
void
()
>
&
callback
);
// FIXME(zcd): A temporary fix for some language model that has sparse
// parameter.
void
RunAndRecordEventNoMutex
(
const
std
::
function
<
void
()
>
&
callback
);
void
RunAndRecordEvent
(
platform
::
Place
p
,
void
RunAndRecordEvent
(
platform
::
Place
p
,
const
std
::
function
<
void
()
>
&
callback
);
const
std
::
function
<
void
()
>
&
callback
);
...
...
paddle/fluid/framework/details/reduce_op_handle.cc
浏览文件 @
9b3f48d7
...
@@ -80,7 +80,9 @@ void ReduceOpHandle::RunImpl() {
...
@@ -80,7 +80,9 @@ void ReduceOpHandle::RunImpl() {
}
}
if
(
pre_in_var
->
IsType
<
framework
::
SelectedRows
>
())
{
if
(
pre_in_var
->
IsType
<
framework
::
SelectedRows
>
())
{
this
->
RunAndRecordEvent
([
&
]
{
// FIXME(zcd): A temporary fix for some language model that has sparse
// parameter.
this
->
RunAndRecordEventNoMutex
([
&
]
{
std
::
vector
<
const
SelectedRows
*>
in_selected_rows
=
std
::
vector
<
const
SelectedRows
*>
in_selected_rows
=
GetInputValues
<
SelectedRows
>
(
in_var_handles
,
var_scopes
);
GetInputValues
<
SelectedRows
>
(
in_var_handles
,
var_scopes
);
GatherSelectedRows
(
in_selected_rows
,
in_places
,
dev_ctxes_
,
t_out_p
,
GatherSelectedRows
(
in_selected_rows
,
in_places
,
dev_ctxes_
,
t_out_p
,
...
...
paddle/fluid/platform/device_context.h
浏览文件 @
9b3f48d7
...
@@ -106,6 +106,14 @@ class CUDADeviceContext : public DeviceContext {
...
@@ -106,6 +106,14 @@ class CUDADeviceContext : public DeviceContext {
PADDLE_ENFORCE
(
cudaEventRecord
(
ev
,
stream_
));
PADDLE_ENFORCE
(
cudaEventRecord
(
ev
,
stream_
));
}
}
// FIXME(zcd): A temporary fix for some language model that has sparse
// parameter.
template
<
typename
Callback
>
void
RecordEventNoMutex
(
cudaEvent_t
ev
,
Callback
callback
)
{
callback
();
PADDLE_ENFORCE
(
cudaEventRecord
(
ev
,
stream_
));
}
private:
private:
CUDAPlace
place_
;
CUDAPlace
place_
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录