Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
f768fbf7
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
f768fbf7
编写于
2月 26, 2019
作者:
Q
Qiao Longfei
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
support multi graph
test=develop
上级
ff01d705
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
50 addition
and
21 deletion
+50
-21
paddle/fluid/framework/details/async_ssa_graph_executor.cc
paddle/fluid/framework/details/async_ssa_graph_executor.cc
+3
-3
paddle/fluid/framework/details/async_ssa_graph_executor.h
paddle/fluid/framework/details/async_ssa_graph_executor.h
+3
-3
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+30
-10
paddle/fluid/framework/parallel_executor.h
paddle/fluid/framework/parallel_executor.h
+1
-1
paddle/fluid/operators/reader/blocking_queue.h
paddle/fluid/operators/reader/blocking_queue.h
+1
-0
paddle/fluid/operators/reader/create_py_reader_op.cc
paddle/fluid/operators/reader/create_py_reader_op.cc
+4
-1
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+1
-1
python/paddle/fluid/parallel_executor.py
python/paddle/fluid/parallel_executor.py
+7
-2
未找到文件。
paddle/fluid/framework/details/async_ssa_graph_executor.cc
浏览文件 @
f768fbf7
...
@@ -20,12 +20,12 @@ namespace details {
...
@@ -20,12 +20,12 @@ namespace details {
AsyncSSAGraphExecutor
::
AsyncSSAGraphExecutor
(
AsyncSSAGraphExecutor
::
AsyncSSAGraphExecutor
(
const
ExecutionStrategy
&
strategy
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
ExecutionStrategy
&
strategy
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
ir
::
Graph
*
graph
)
const
std
::
vector
<
platform
::
Place
>
&
places
,
std
::
vector
<
ir
::
Graph
*>
graphs
)
:
strategy_
(
std
::
move
(
strategy
)),
:
strategy_
(
std
::
move
(
strategy
)),
local_scopes_
(
std
::
move
(
local_scopes
)),
local_scopes_
(
std
::
move
(
local_scopes
)),
pool_
(
places
.
size
()
>=
2
?
new
::
ThreadPool
(
places
.
size
())
:
nullptr
),
pool_
(
places
.
size
()
>=
2
?
new
::
ThreadPool
(
places
.
size
())
:
nullptr
),
places_
(
std
::
move
(
places
)),
places_
(
std
::
move
(
places
)),
graph
_
(
graph
)
{
graph
s_
(
std
::
move
(
graphs
)
)
{
VLOG
(
3
)
<<
"build AsyncSSAGraphExecutor"
;
VLOG
(
3
)
<<
"build AsyncSSAGraphExecutor"
;
PADDLE_ENFORCE_EQ
(
places_
.
size
(),
local_scopes_
.
size
());
PADDLE_ENFORCE_EQ
(
places_
.
size
(),
local_scopes_
.
size
());
...
@@ -37,7 +37,7 @@ AsyncSSAGraphExecutor::AsyncSSAGraphExecutor(
...
@@ -37,7 +37,7 @@ AsyncSSAGraphExecutor::AsyncSSAGraphExecutor(
<<
" to run the operators of the graph on each device."
;
<<
" to run the operators of the graph on each device."
;
for
(
size_t
i
=
0
;
i
<
places
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
places
.
size
();
++
i
)
{
executors_
.
emplace_back
(
new
details
::
ThreadedSSAGraphExecutor
(
executors_
.
emplace_back
(
new
details
::
ThreadedSSAGraphExecutor
(
strategy_
,
{
local_scopes_
[
i
]},
{
places_
[
i
]},
graph
_
));
strategy_
,
{
local_scopes_
[
i
]},
{
places_
[
i
]},
graph
s_
[
i
]
));
}
}
}
}
...
...
paddle/fluid/framework/details/async_ssa_graph_executor.h
浏览文件 @
f768fbf7
...
@@ -29,9 +29,9 @@ class AsyncSSAGraphExecutor : public SSAGraphExecutor {
...
@@ -29,9 +29,9 @@ class AsyncSSAGraphExecutor : public SSAGraphExecutor {
AsyncSSAGraphExecutor
(
const
ExecutionStrategy
&
strategy
,
AsyncSSAGraphExecutor
(
const
ExecutionStrategy
&
strategy
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
ir
::
Graph
*
graph
);
std
::
vector
<
ir
::
Graph
*>
graphs
);
~
AsyncSSAGraphExecutor
()
final
=
default
;
~
AsyncSSAGraphExecutor
()
final
=
default
;
const
ir
::
Graph
&
Graph
()
const
override
{
return
*
graph
_
;
}
const
ir
::
Graph
&
Graph
()
const
override
{
return
*
graph
s_
[
0
]
;
}
FeedFetchList
Run
(
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
)
override
;
FeedFetchList
Run
(
const
std
::
vector
<
std
::
string
>
&
fetch_tensors
)
override
;
...
@@ -40,7 +40,7 @@ class AsyncSSAGraphExecutor : public SSAGraphExecutor {
...
@@ -40,7 +40,7 @@ class AsyncSSAGraphExecutor : public SSAGraphExecutor {
std
::
vector
<
Scope
*>
local_scopes_
;
std
::
vector
<
Scope
*>
local_scopes_
;
std
::
unique_ptr
<::
ThreadPool
>
pool_
{
nullptr
};
std
::
unique_ptr
<::
ThreadPool
>
pool_
{
nullptr
};
std
::
vector
<
platform
::
Place
>
places_
;
std
::
vector
<
platform
::
Place
>
places_
;
ir
::
Graph
*
graph
_
;
std
::
vector
<
ir
::
Graph
*>
graphs
_
;
std
::
vector
<
std
::
unique_ptr
<
details
::
ThreadedSSAGraphExecutor
>>
executors_
;
std
::
vector
<
std
::
unique_ptr
<
details
::
ThreadedSSAGraphExecutor
>>
executors_
;
ExceptionHolder
exception_holder_
;
ExceptionHolder
exception_holder_
;
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
f768fbf7
...
@@ -188,7 +188,7 @@ ParallelExecutor::ParallelExecutor(
...
@@ -188,7 +188,7 @@ ParallelExecutor::ParallelExecutor(
const
std
::
string
&
loss_var_name
,
Scope
*
scope
,
const
std
::
string
&
loss_var_name
,
Scope
*
scope
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
ExecutionStrategy
&
exec_strategy
,
const
BuildStrategy
&
build_strategy
,
const
ExecutionStrategy
&
exec_strategy
,
const
BuildStrategy
&
build_strategy
,
ir
::
Graph
*
graph
)
std
::
vector
<
ir
::
Graph
*>
graphs
)
:
member_
(
new
ParallelExecutorPrivate
(
places
))
{
:
member_
(
new
ParallelExecutorPrivate
(
places
))
{
member_
->
global_scope_
=
scope
;
member_
->
global_scope_
=
scope
;
member_
->
use_cuda_
=
exec_strategy
.
use_cuda_
;
member_
->
use_cuda_
=
exec_strategy
.
use_cuda_
;
...
@@ -222,6 +222,8 @@ ParallelExecutor::ParallelExecutor(
...
@@ -222,6 +222,8 @@ ParallelExecutor::ParallelExecutor(
PADDLE_ENFORCE
(
!
member_
->
use_cuda_
,
PADDLE_ENFORCE
(
!
member_
->
use_cuda_
,
"gpu mode does not support async_mode_ now!"
);
"gpu mode does not support async_mode_ now!"
);
}
}
ir
::
Graph
*
graph
=
graphs
[
0
];
std
::
unique_ptr
<
ir
::
Graph
>
temp_owned_graph
(
graph
);
std
::
unique_ptr
<
ir
::
Graph
>
temp_owned_graph
(
graph
);
// FIXME(Yancey1989): parallel graph mode get better performance
// FIXME(Yancey1989): parallel graph mode get better performance
...
@@ -262,17 +264,26 @@ ParallelExecutor::ParallelExecutor(
...
@@ -262,17 +264,26 @@ ParallelExecutor::ParallelExecutor(
if
(
member_
->
local_scopes_
.
size
()
!=
1
&&
local_scopes
.
empty
())
{
if
(
member_
->
local_scopes_
.
size
()
!=
1
&&
local_scopes
.
empty
())
{
BCastParamsToDevices
(
bcast_vars
);
BCastParamsToDevices
(
bcast_vars
);
}
}
// Startup Program has been run. All local scopes has correct parameters.
// Startup Program has been run. All local scopes has correct parameters.
// Step 2. Convert main_program to SSA form and dependency graph. Also, insert
// Step 2. Convert main_program to SSA form and dependency graph. Also, insert
// ncclOp
// ncclOp
std
::
vector
<
ir
::
Graph
*>
async_graphs
(
places
.
size
());
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
if
(
build_strategy
.
async_mode_
&&
!
build_strategy
.
is_distribution_
)
{
if
(
build_strategy
.
async_mode_
&&
!
build_strategy
.
is_distribution_
)
{
VLOG
(
3
)
<<
"use local async mode"
;
VLOG
(
3
)
<<
"use local async mode"
;
temp_owned_graph
=
build_strategy
.
Apply
(
temp_owned_graph
=
std
::
move
(
temp_owned_graph
),
{
member_
->
places_
[
0
]},
loss_var_name
,
build_strategy
.
Apply
(
std
::
move
(
temp_owned_graph
),
{
member_
->
places_
[
0
]},
{
member_
->
local_scopes_
[
0
]},
member_
->
nranks_
,
member_
->
use_cuda_
,
loss_var_name
,
{
member_
->
local_scopes_
[
0
]},
1
,
member_
->
nccl_ctxs_
.
get
());
member_
->
use_cuda_
,
member_
->
nccl_ctxs_
.
get
());
for
(
int
i
=
1
;
i
<
member_
->
places_
.
size
();
++
i
)
{
std
::
unique_ptr
<
ir
::
Graph
>
temp_graph
(
graphs
[
i
]);
temp_graph
=
build_strategy
.
Apply
(
std
::
move
(
temp_graph
),
{
member_
->
places_
[
i
]},
loss_var_name
,
{
member_
->
local_scopes_
[
i
]},
1
,
member_
->
use_cuda_
,
member_
->
nccl_ctxs_
.
get
());
async_graphs
[
i
]
=
temp_graph
.
release
();
}
}
else
{
}
else
{
temp_owned_graph
=
build_strategy
.
Apply
(
temp_owned_graph
=
build_strategy
.
Apply
(
std
::
move
(
temp_owned_graph
),
member_
->
places_
,
loss_var_name
,
std
::
move
(
temp_owned_graph
),
member_
->
places_
,
loss_var_name
,
...
@@ -284,7 +295,14 @@ ParallelExecutor::ParallelExecutor(
...
@@ -284,7 +295,14 @@ ParallelExecutor::ParallelExecutor(
VLOG
(
3
)
<<
"use local async mode"
;
VLOG
(
3
)
<<
"use local async mode"
;
temp_owned_graph
=
build_strategy
.
Apply
(
temp_owned_graph
=
build_strategy
.
Apply
(
std
::
move
(
temp_owned_graph
),
{
member_
->
places_
[
0
]},
loss_var_name
,
std
::
move
(
temp_owned_graph
),
{
member_
->
places_
[
0
]},
loss_var_name
,
{
member_
->
local_scopes_
[
0
]},
member_
->
nranks_
,
member_
->
use_cuda_
);
{
member_
->
local_scopes_
[
0
]},
1
,
member_
->
use_cuda_
);
for
(
int
i
=
1
;
i
<
member_
->
places_
.
size
();
++
i
)
{
std
::
unique_ptr
<
ir
::
Graph
>
temp_graph
(
graphs
[
i
]);
temp_graph
=
build_strategy
.
Apply
(
std
::
move
(
temp_graph
),
{
member_
->
places_
[
i
]},
loss_var_name
,
{
member_
->
local_scopes_
[
i
]},
1
,
member_
->
use_cuda_
);
async_graphs
[
i
]
=
temp_graph
.
release
();
}
}
else
{
}
else
{
temp_owned_graph
=
build_strategy
.
Apply
(
temp_owned_graph
=
build_strategy
.
Apply
(
std
::
move
(
temp_owned_graph
),
member_
->
places_
,
loss_var_name
,
std
::
move
(
temp_owned_graph
),
member_
->
places_
,
loss_var_name
,
...
@@ -304,6 +322,8 @@ ParallelExecutor::ParallelExecutor(
...
@@ -304,6 +322,8 @@ ParallelExecutor::ParallelExecutor(
graph
=
temp_owned_graph
.
release
();
graph
=
temp_owned_graph
.
release
();
}
}
async_graphs
[
0
]
=
graph
;
// Step 3. Create vars in each scope. Passes may also create new vars.
// Step 3. Create vars in each scope. Passes may also create new vars.
// skip control vars and empty vars
// skip control vars and empty vars
std
::
vector
<
details
::
VariableInfo
>
var_infos
;
std
::
vector
<
details
::
VariableInfo
>
var_infos
;
...
@@ -334,7 +354,7 @@ ParallelExecutor::ParallelExecutor(
...
@@ -334,7 +354,7 @@ ParallelExecutor::ParallelExecutor(
if
(
build_strategy
.
async_mode_
&&
!
build_strategy
.
is_distribution_
)
{
if
(
build_strategy
.
async_mode_
&&
!
build_strategy
.
is_distribution_
)
{
VLOG
(
3
)
<<
"use AsyncSSAGraphExecutor"
;
VLOG
(
3
)
<<
"use AsyncSSAGraphExecutor"
;
member_
->
executor_
.
reset
(
new
details
::
AsyncSSAGraphExecutor
(
member_
->
executor_
.
reset
(
new
details
::
AsyncSSAGraphExecutor
(
exec_strategy
,
member_
->
local_scopes_
,
member_
->
places_
,
graph
));
exec_strategy
,
member_
->
local_scopes_
,
member_
->
places_
,
async_graphs
));
}
else
if
(
build_strategy
.
enable_parallel_graph_
)
{
}
else
if
(
build_strategy
.
enable_parallel_graph_
)
{
VLOG
(
3
)
<<
"use ParallelSSAGraphExecutor"
;
VLOG
(
3
)
<<
"use ParallelSSAGraphExecutor"
;
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
...
...
paddle/fluid/framework/parallel_executor.h
浏览文件 @
f768fbf7
...
@@ -50,7 +50,7 @@ class ParallelExecutor {
...
@@ -50,7 +50,7 @@ class ParallelExecutor {
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
ExecutionStrategy
&
exec_strategy
,
const
ExecutionStrategy
&
exec_strategy
,
const
BuildStrategy
&
build_strategy
,
const
BuildStrategy
&
build_strategy
,
ir
::
Graph
*
graph
);
std
::
vector
<
ir
::
Graph
*>
graphs
);
~
ParallelExecutor
();
~
ParallelExecutor
();
...
...
paddle/fluid/operators/reader/blocking_queue.h
浏览文件 @
f768fbf7
...
@@ -95,6 +95,7 @@ class BlockingQueue {
...
@@ -95,6 +95,7 @@ class BlockingQueue {
void
Close
()
{
void
Close
()
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
VLOG
(
3
)
<<
"close queue"
;
closed_
=
true
;
closed_
=
true
;
send_cv_
.
notify_all
();
send_cv_
.
notify_all
();
receive_cv_
.
notify_all
();
receive_cv_
.
notify_all
();
...
...
paddle/fluid/operators/reader/create_py_reader_op.cc
浏览文件 @
f768fbf7
...
@@ -35,7 +35,10 @@ class PyReader : public framework::FileReader {
...
@@ -35,7 +35,10 @@ class PyReader : public framework::FileReader {
~
PyReader
()
{
queue_
->
Close
();
}
~
PyReader
()
{
queue_
->
Close
();
}
void
Shutdown
()
override
{
queue_
->
Close
();
}
void
Shutdown
()
override
{
VLOG
(
3
)
<<
"PyReader shutdown!"
;
queue_
->
Close
();
}
void
Start
()
override
{
queue_
->
ReOpen
();
}
void
Start
()
override
{
queue_
->
ReOpen
();
}
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
f768fbf7
...
@@ -1230,7 +1230,7 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -1230,7 +1230,7 @@ All parameter, weight, gradient are variables in Paddle.
pe
.
def
(
py
::
init
<
const
std
::
vector
<
platform
::
Place
>
&
,
pe
.
def
(
py
::
init
<
const
std
::
vector
<
platform
::
Place
>
&
,
const
std
::
unordered_set
<
std
::
string
>
&
,
const
std
::
string
&
,
const
std
::
unordered_set
<
std
::
string
>
&
,
const
std
::
string
&
,
Scope
*
,
std
::
vector
<
Scope
*>
&
,
const
ExecutionStrategy
&
,
Scope
*
,
std
::
vector
<
Scope
*>
&
,
const
ExecutionStrategy
&
,
const
BuildStrategy
&
,
ir
::
Graph
*
>
())
const
BuildStrategy
&
,
std
::
vector
<
ir
::
Graph
*>
>
())
// NOTE: even we return a vec<Scope*>* to Python use reference policy.
// NOTE: even we return a vec<Scope*>* to Python use reference policy.
// We still cannot get local_scope from this vector, since the element
// We still cannot get local_scope from this vector, since the element
// of vec<Scope*> will be freed by Python GC. We can only return Scope*
// of vec<Scope*> will be freed by Python GC. We can only return Scope*
...
...
python/paddle/fluid/parallel_executor.py
浏览文件 @
f768fbf7
...
@@ -177,12 +177,17 @@ class ParallelExecutor(object):
...
@@ -177,12 +177,17 @@ class ParallelExecutor(object):
# step7: init ParallelExecutor
# step7: init ParallelExecutor
# ParallelExecutor API will be deprecated, don't support parallel graph.
# ParallelExecutor API will be deprecated, don't support parallel graph.
self
.
_graph
=
core
.
Graph
(
main
.
desc
)
self
.
_graphs
=
[]
if
build_strategy
.
async_mode
:
for
_
in
range
(
cpu_num
):
self
.
_graphs
.
append
(
core
.
Graph
(
main
.
desc
))
else
:
self
.
_graphs
.
append
(
core
.
Graph
(
main
.
desc
))
self
.
executor
=
core
.
ParallelExecutor
(
self
.
executor
=
core
.
ParallelExecutor
(
places
,
persistable_vars
,
places
,
persistable_vars
,
cpt
.
to_text
(
loss_name
)
if
loss_name
else
six
.
u
(
''
),
scope
,
cpt
.
to_text
(
loss_name
)
if
loss_name
else
six
.
u
(
''
),
scope
,
local_scopes
,
exec_strategy
,
build_strategy
,
self
.
_graph
)
local_scopes
,
exec_strategy
,
build_strategy
,
self
.
_graph
s
)
self
.
scope
=
scope
self
.
scope
=
scope
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录