Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
17d62ab2
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
17d62ab2
编写于
8月 09, 2019
作者:
C
chengduo
提交者:
GitHub
8月 09, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Enhance fuse optimization op pass (#19010)
* Enhance fuse optimization op pass test=develop
上级
21440b4d
变更
11
显示空白变更内容
内联
并排
Showing
11 changed file
with
325 addition
and
112 deletion
+325
-112
paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_adam_op_pass.cc
...framework/ir/fuse_optimizer_ops_pass/fuse_adam_op_pass.cc
+58
-35
paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_momentum_op_pass.cc
...ework/ir/fuse_optimizer_ops_pass/fuse_momentum_op_pass.cc
+2
-4
paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc
...work/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc
+120
-12
paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.h
...ework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.h
+7
-3
paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_sgd_op_pass.cc
.../framework/ir/fuse_optimizer_ops_pass/fuse_sgd_op_pass.cc
+2
-4
python/paddle/fluid/tests/unittests/dist_save_load.py
python/paddle/fluid/tests/unittests/dist_save_load.py
+0
-1
python/paddle/fluid/tests/unittests/parallel_executor_test_base.py
...ddle/fluid/tests/unittests/parallel_executor_test_base.py
+93
-42
python/paddle/fluid/tests/unittests/test_dist_base.py
python/paddle/fluid/tests/unittests/test_dist_base.py
+0
-1
python/paddle/fluid/tests/unittests/test_fuse_optimizer_pass.py
.../paddle/fluid/tests/unittests/test_fuse_optimizer_pass.py
+43
-6
python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py
...dle/fluid/tests/unittests/test_parallel_executor_mnist.py
+0
-2
python/paddle/fluid/tests/unittests/test_parallel_executor_pg.py
...paddle/fluid/tests/unittests/test_parallel_executor_pg.py
+0
-2
未找到文件。
paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_adam_op_pass.cc
浏览文件 @
17d62ab2
...
...
@@ -32,19 +32,62 @@ class FuseAdamOpPass : public FuseOptimizerOpPass {
return
{
"Moment1"
,
"Moment2"
,
"Beta1Pow"
,
"Beta2Pow"
};
}
void
FuseOptimizerOps
(
ir
::
Node
*
FuseOptimizerOps
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
&
aux_var_set
,
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>
&
fused_vars_name
,
const
std
::
vector
<
ir
::
Node
*>
&
adam_ops
,
ir
::
Graph
*
graph
)
const
{
auto
fused_adam_node
=
FuseAdamOps
(
aux_var_set
,
fused_vars_name
,
adam_ops
,
graph
);
auto
fused_scale1
=
FuseScaleOps
(
aux_var_set
.
at
(
"Beta1Pow"
),
fused_vars_name
.
at
(
"Beta1Pow"
),
adam_ops
,
graph
);
auto
fused_scale2
=
FuseScaleOps
(
aux_var_set
.
at
(
"Beta2Pow"
),
fused_vars_name
.
at
(
"Beta2Pow"
),
adam_ops
,
graph
);
RemoveCycleDepsBetweenOpNodes
(
graph
,
fused_scale1
,
fused_scale2
);
return
fused_adam_node
;
}
void
FuseAdamOps
(
void
RemoveCycleDepsBetweenOpNodes
(
Graph
*
graph
,
const
Node
*
fused_scale1
,
const
Node
*
fused_scale2
)
const
{
std
::
unordered_set
<
Node
*>
not_need_ctrl_var_nodes
;
std
::
unordered_set
<
Node
*>
fused_scale2_in_nodes
;
fused_scale2_in_nodes
.
insert
(
fused_scale2
->
inputs
.
begin
(),
fused_scale2
->
inputs
.
end
());
for
(
auto
&
out_node
:
fused_scale1
->
outputs
)
{
if
(
fused_scale2_in_nodes
.
count
(
out_node
))
{
PADDLE_ENFORCE
(
out_node
->
IsCtrlVar
(),
"The dependency var only should be ctrl var."
);
not_need_ctrl_var_nodes
.
insert
(
out_node
);
}
}
for
(
auto
&
node
:
not_need_ctrl_var_nodes
)
{
// remove this node from the input op node.
PADDLE_ENFORCE
(
!
node
->
inputs
.
empty
());
auto
op_node
=
node
->
inputs
.
front
();
PADDLE_ENFORCE
(
op_node
->
IsOp
());
op_node
->
outputs
.
erase
(
remove_if
(
op_node
->
outputs
.
begin
(),
op_node
->
outputs
.
end
(),
[
&
node
](
const
Node
*
op_out_node
)
{
return
op_out_node
==
node
;
}),
op_node
->
outputs
.
end
());
// remove this node from the output op nodes.
for
(
auto
&
out_op_node
:
node
->
outputs
)
{
out_op_node
->
inputs
.
erase
(
remove_if
(
out_op_node
->
inputs
.
begin
(),
out_op_node
->
inputs
.
end
(),
[
&
node
](
const
Node
*
op_in_node
)
{
return
op_in_node
==
node
;
}),
out_op_node
->
inputs
.
end
());
}
graph
->
RemoveNode
(
node
);
}
}
ir
::
Node
*
FuseAdamOps
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
&
vars_set
,
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>
&
fused_vars_name
,
const
std
::
vector
<
ir
::
Node
*>
&
adam_ops
,
ir
::
Graph
*
graph
)
const
{
...
...
@@ -102,13 +145,10 @@ class FuseAdamOpPass : public FuseOptimizerOpPass {
adam_desc
.
SetAttr
(
"min_row_size_to_use_multithread"
,
min_row_size_to_use_multithread
);
adam_desc
.
SetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
(),
op_role
);
auto
adam_node
=
graph
->
CreateOpNode
(
&
adam_desc
);
InserInputAndOutputForOptOps
(
adam_ops
,
adam_node
);
return
graph
->
CreateOpNode
(
&
adam_desc
);
}
void
FuseScaleOps
(
const
std
::
vector
<
std
::
string
>
&
beta_name
,
ir
::
Node
*
FuseScaleOps
(
const
std
::
vector
<
std
::
string
>
&
beta_name
,
const
std
::
string
&
fused_var_name
,
const
std
::
vector
<
ir
::
Node
*>
&
adam_ops
,
ir
::
Graph
*
graph
)
const
{
...
...
@@ -139,7 +179,7 @@ class FuseAdamOpPass : public FuseOptimizerOpPass {
scale_ops
.
emplace_back
(
*
scale_op_iter
);
}
PADDLE_ENFORCE_EQ
(
scale_ops
.
size
(),
beta_name
.
size
());
VLOG
(
7
)
<<
"The number of scale op is "
<<
scale_ops
.
size
()
<<
"."
;
// Check attributions
// NOTE: If new attribution is added, the following code maybe need change.
int
op_role
=
boost
::
get
<
int
>
(
...
...
@@ -175,29 +215,12 @@ class FuseAdamOpPass : public FuseOptimizerOpPass {
scale_desc
.
SetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
(),
op_role
);
auto
scale_node
=
graph
->
CreateOpNode
(
&
scale_desc
);
for
(
auto
scale_op
:
scale_ops
)
{
// set inputs
scale_node
->
inputs
.
insert
(
scale_node
->
inputs
.
begin
(),
scale_op
->
inputs
.
begin
(),
scale_op
->
inputs
.
end
());
for
(
auto
&
input
:
scale_op
->
inputs
)
{
std
::
replace
(
input
->
outputs
.
begin
(),
input
->
outputs
.
end
(),
scale_op
,
scale_node
);
}
// set outputs
scale_node
->
outputs
.
insert
(
scale_node
->
outputs
.
begin
(),
scale_op
->
outputs
.
begin
(),
scale_op
->
outputs
.
end
());
for
(
auto
&
output
:
scale_op
->
outputs
)
{
std
::
replace
(
output
->
inputs
.
begin
(),
output
->
inputs
.
end
(),
scale_op
,
scale_node
);
}
}
InsertInputAndOutputForFusedOpNode
(
scale_ops
,
graph
,
scale_node
);
// Delete scale_ops
for
(
auto
&
scale_op
:
scale_ops
)
{
graph
->
RemoveNode
(
scale_op
);
}
return
scale_node
;
}
};
}
// namespace ir
...
...
paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_momentum_op_pass.cc
浏览文件 @
17d62ab2
...
...
@@ -33,7 +33,7 @@ class FuseMomentumOpPass : public FuseOptimizerOpPass {
}
// Fuse Momentum Ops
virtual
void
FuseOptimizerOps
(
virtual
ir
::
Node
*
FuseOptimizerOps
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
&
vars_set
,
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>
&
fused_vars_name
,
const
std
::
vector
<
ir
::
Node
*>
&
momentum_ops
,
ir
::
Graph
*
graph
)
const
{
...
...
@@ -77,9 +77,7 @@ class FuseMomentumOpPass : public FuseOptimizerOpPass {
momentum_desc
.
SetAttr
(
"use_nesterov"
,
use_nesterov
);
momentum_desc
.
SetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
(),
op_role
);
auto
momentum_node
=
graph
->
CreateOpNode
(
&
momentum_desc
);
InserInputAndOutputForOptOps
(
momentum_ops
,
momentum_node
);
return
graph
->
CreateOpNode
(
&
momentum_desc
);
}
};
...
...
paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc
浏览文件 @
17d62ab2
...
...
@@ -14,6 +14,7 @@
#include "paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.h"
#include <algorithm>
#include <set>
#include <unordered_set>
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/op_registry.h"
...
...
@@ -59,6 +60,15 @@ void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const {
}
return
;
}
// There should not have no-ctr-var between the op_nodes that link the op_node
// of op_nodes.
if
(
HasVarDepsBetweenOps
(
topo_nodes
,
opt_nodes
))
{
VLOG
(
6
)
<<
"There are interdependent variables among these optimization "
"operators, which can not be handled well at present."
;
return
;
}
result
.
Set
(
details
::
kFusedOptType
,
new
details
::
FusedOptType
);
result
.
Get
<
details
::
FusedOptType
>
(
details
::
kFusedOptType
)
=
fuse_op_type
;
if
(
!
result
.
Has
(
details
::
kProgramDescs
))
{
...
...
@@ -158,14 +168,54 @@ void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const {
&
result
);
// Step 5: Fuse optimizer Ops and Scale Ops
auto
*
fused_opt_node
=
FuseOptimizerOps
(
aux_var_set
,
fused_vars_name
,
opt_nodes
,
&
result
);
InsertInputAndOutputForFusedOpNode
(
opt_nodes
,
graph
,
fused_opt_node
);
// Step 6: Remove optimizer Ops
for
(
auto
&
opt_op
:
opt_nodes
)
{
graph
->
RemoveNode
(
opt_op
);
}
}
bool
FuseOptimizerOpPass
::
HasVarDepsBetweenOps
(
const
std
::
vector
<
Node
*>
&
topo_nodes
,
const
std
::
vector
<
Node
*>
&
opt_nodes
)
const
{
std
::
unordered_map
<
Node
*
,
std
::
unordered_set
<
Node
*>>
preceding_ops
;
std
::
unordered_map
<
Node
*
,
std
::
unordered_set
<
Node
*>>
pending_ops
;
for
(
auto
&
op
:
topo_nodes
)
{
preceding_ops
[
op
];
pending_ops
[
op
];
for
(
auto
&
var
:
op
->
outputs
)
{
if
(
var
->
IsCtrlVar
())
continue
;
for
(
auto
&
pending_op
:
var
->
outputs
)
{
preceding_ops
[
pending_op
].
insert
(
op
);
pending_ops
[
op
].
insert
(
pending_op
);
}
}
}
std
::
unordered_set
<
Node
*>
opt_node_set
(
opt_nodes
.
begin
(),
opt_nodes
.
end
());
auto
has_var_deps
=
[](
const
std
::
unordered_set
<
Node
*>
&
op_set1
,
const
std
::
unordered_set
<
Node
*>
&
op_set2
)
->
bool
{
std
::
set
<
Node
*>
intersect_ops
;
set_intersection
(
op_set1
.
begin
(),
op_set1
.
end
(),
op_set2
.
begin
(),
op_set2
.
end
(),
inserter
(
intersect_ops
,
intersect_ops
.
begin
()));
return
!
intersect_ops
.
empty
();
};
for
(
auto
opt_node
:
opt_node_set
)
{
if
(
has_var_deps
(
preceding_ops
.
at
(
opt_node
),
opt_node_set
))
{
return
true
;
}
if
(
has_var_deps
(
pending_ops
.
at
(
opt_node
),
opt_node_set
))
{
return
true
;
}
}
return
false
;
}
void
FuseOptimizerOpPass
::
GradientsFilter
(
const
std
::
vector
<
size_t
>
&
new_grad_idx
,
std
::
vector
<
Node
*>
*
opt_nodes
,
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
*
aux_var_set
)
...
...
@@ -338,26 +388,84 @@ void FuseOptimizerOpPass::AppendAllocContinuousSpace(
op_desc
->
SetAttr
(
"check_name"
,
check_name
);
}
void
FuseOptimizerOpPass
::
InserInputAndOutputForOptOps
(
const
std
::
vector
<
ir
::
Node
*>
&
opt_nodes
,
ir
::
Node
*
opt_node
)
const
{
void
FuseOptimizerOpPass
::
InsertInputAndOutputForFusedOpNode
(
const
std
::
vector
<
ir
::
Node
*>
&
op_nodes
,
ir
::
Graph
*
graph
,
ir
::
Node
*
fused_opt_node
)
const
{
std
::
unordered_set
<
ir
::
Node
*>
inputs
;
std
::
unordered_set
<
ir
::
Node
*>
outputs
;
for
(
auto
opt_op
:
opt_nodes
)
{
// set inputs
for
(
auto
opt_op
:
op_nodes
)
{
inputs
.
insert
(
opt_op
->
inputs
.
begin
(),
opt_op
->
inputs
.
end
());
for
(
auto
&
input
:
opt_op
->
inputs
)
{
replace
(
input
->
outputs
.
begin
(),
input
->
outputs
.
end
(),
opt_op
,
opt_node
);
replace
(
input
->
outputs
.
begin
(),
input
->
outputs
.
end
(),
opt_op
,
fused_opt_node
);
}
// set outputs
outputs
.
insert
(
opt_op
->
outputs
.
begin
(),
opt_op
->
outputs
.
end
());
for
(
auto
&
output
:
opt_op
->
outputs
)
{
replace
(
output
->
inputs
.
begin
(),
output
->
inputs
.
end
(),
opt_op
,
opt_node
);
replace
(
output
->
inputs
.
begin
(),
output
->
inputs
.
end
(),
opt_op
,
fused_opt_node
);
}
}
// Remove the dependence vars between op_nodes.
std
::
unordered_set
<
ir
::
Node
*>
out_dep_vars
;
std
::
unordered_set
<
ir
::
Node
*>
not_useful_vars
;
auto
deal_with_ctrl_vars
=
[
&
out_dep_vars
,
&
not_useful_vars
,
&
fused_opt_node
](
ir
::
Node
*
ctr_var_node
)
{
PADDLE_ENFORCE_EQ
(
ctr_var_node
->
inputs
.
size
(),
1
);
if
(
ctr_var_node
->
inputs
.
front
()
==
fused_opt_node
)
{
PADDLE_ENFORCE_GT
(
ctr_var_node
->
outputs
.
size
(),
0
);
auto
output_ops
=
ctr_var_node
->
outputs
;
output_ops
.
erase
(
std
::
remove_if
(
output_ops
.
begin
(),
output_ops
.
end
(),
[
&
fused_opt_node
](
const
ir
::
Node
*
node
)
{
return
node
==
fused_opt_node
;
}),
output_ops
.
end
());
if
(
!
output_ops
.
empty
())
{
out_dep_vars
.
insert
(
ctr_var_node
);
}
not_useful_vars
.
insert
(
ctr_var_node
);
}
};
for
(
auto
*
in_node
:
inputs
)
{
if
(
in_node
->
IsCtrlVar
())
{
deal_with_ctrl_vars
(
in_node
);
}
}
for
(
auto
*
out_node
:
outputs
)
{
if
(
out_node
->
IsCtrlVar
())
{
deal_with_ctrl_vars
(
out_node
);
}
}
for
(
auto
&
node
:
not_useful_vars
)
{
if
(
inputs
.
count
(
node
))
{
inputs
.
erase
(
node
);
}
if
(
outputs
.
count
(
node
))
{
outputs
.
erase
(
node
);
}
}
for
(
auto
&
dep_var
:
out_dep_vars
)
{
if
(
not_useful_vars
.
count
(
dep_var
))
{
not_useful_vars
.
erase
(
dep_var
);
}
dep_var
->
inputs
.
clear
();
dep_var
->
inputs
.
emplace_back
(
fused_opt_node
);
}
opt_node
->
inputs
.
insert
(
opt_node
->
inputs
.
begin
(),
inputs
.
begin
(),
outputs
.
insert
(
out_dep_vars
.
begin
(),
out_dep_vars
.
end
());
fused_opt_node
->
inputs
.
insert
(
fused_opt_node
->
inputs
.
begin
(),
inputs
.
begin
(),
inputs
.
end
());
opt_node
->
outputs
.
insert
(
opt_node
->
outputs
.
begin
(),
outputs
.
begin
(),
outputs
.
end
());
fused_opt_node
->
outputs
.
insert
(
fused_opt_node
->
outputs
.
begin
(),
outputs
.
begin
(),
outputs
.
end
());
for
(
auto
&
ctrl_var_node
:
not_useful_vars
)
{
graph
->
RemoveNode
(
ctrl_var_node
);
}
}
}
// namespace ir
}
// namespace framework
...
...
paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.h
浏览文件 @
17d62ab2
...
...
@@ -41,7 +41,8 @@ class FuseOptimizerOpPass : public ir::Pass {
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
*
aux_var_set
,
std
::
vector
<
ir
::
Node
*>
*
ops
)
const
;
void
InserInputAndOutputForOptOps
(
const
std
::
vector
<
ir
::
Node
*>
&
opt_ops
,
void
InsertInputAndOutputForFusedOpNode
(
const
std
::
vector
<
ir
::
Node
*>
&
opt_ops
,
ir
::
Graph
*
graph
,
ir
::
Node
*
opt_node
)
const
;
private:
...
...
@@ -49,7 +50,7 @@ class FuseOptimizerOpPass : public ir::Pass {
virtual
const
std
::
vector
<
std
::
string
>
GetAuxiliaryVarNames
()
const
=
0
;
virtual
void
FuseOptimizerOps
(
virtual
ir
::
Node
*
FuseOptimizerOps
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
&
vars_set
,
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>
&
fused_vars_name
,
const
std
::
vector
<
ir
::
Node
*>
&
adam_ops
,
ir
::
Graph
*
graph
)
const
=
0
;
...
...
@@ -91,6 +92,9 @@ class FuseOptimizerOpPass : public ir::Pass {
*
aux_var_set
)
const
;
bool
IsLoDTensorType
(
const
proto
::
VarType
::
Type
&
type
)
const
;
bool
HasVarDepsBetweenOps
(
const
std
::
vector
<
Node
*>
&
topo_nodes
,
const
std
::
vector
<
Node
*>
&
opt_nodes
)
const
;
};
}
// namespace ir
...
...
paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_sgd_op_pass.cc
浏览文件 @
17d62ab2
...
...
@@ -31,7 +31,7 @@ class FuseSgdOpPass : public FuseOptimizerOpPass {
}
// Fuse Sgd Ops
virtual
void
FuseOptimizerOps
(
virtual
ir
::
Node
*
FuseOptimizerOps
(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
&
vars_set
,
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>
&
fused_vars_name
,
const
std
::
vector
<
ir
::
Node
*>
&
sgd_ops
,
ir
::
Graph
*
graph
)
const
{
...
...
@@ -56,9 +56,7 @@ class FuseSgdOpPass : public FuseOptimizerOpPass {
// NOTE: multi_devices_pass requires that every op should have a role.
Sgd_desc
.
SetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
(),
op_role
);
auto
sgd_node
=
graph
->
CreateOpNode
(
&
Sgd_desc
);
InserInputAndOutputForOptOps
(
sgd_ops
,
sgd_node
);
return
graph
->
CreateOpNode
(
&
Sgd_desc
);
}
};
}
// namespace ir
...
...
python/paddle/fluid/tests/unittests/dist_save_load.py
浏览文件 @
17d62ab2
...
...
@@ -124,7 +124,6 @@ class TestDistSaveLoad2x2(TestDistSimnetBow2x2):
strategy
=
fluid
.
ExecutionStrategy
()
strategy
.
num_threads
=
1
strategy
.
allow_op_delay
=
False
build_stra
=
fluid
.
BuildStrategy
()
...
...
python/paddle/fluid/tests/unittests/parallel_executor_test_base.py
浏览文件 @
17d62ab2
...
...
@@ -36,10 +36,8 @@ class TestParallelExecutorBase(unittest.TestCase):
memory_opt
=
False
,
iter
=
50
,
batch_size
=
None
,
allow_op_delay
=
False
,
feed_dict
=
None
,
get_data_from_feeder
=
None
,
seed
=
None
,
use_parallel_executor
=
True
,
use_reduce
=
False
,
use_ir_memory_optimize
=
True
,
...
...
@@ -57,51 +55,23 @@ class TestParallelExecutorBase(unittest.TestCase):
main
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
startup
.
random_seed
=
1
# Fix random seed
startup
.
random_seed
=
1
main
.
random_seed
=
1
with
fluid
.
program_guard
(
main
,
startup
):
if
seed
is
not
None
:
startup
.
random_seed
=
seed
main
.
random_seed
=
seed
loss
=
method
(
use_feed
=
feed_dict
is
not
None
)
# NOTE(zjl): memory_optimize/inplace pass would not require
# that loss.persistable = True
loss
.
persistable
=
memory_opt
if
optimizer
:
optimizer
().
minimize
(
loss
)
if
memory_opt
:
fluid
.
memory_optimize
(
main
)
if
get_data_from_feeder
is
not
None
:
assert
feed_dict
is
None
feed_dict
=
get_data_from_feeder
()
feed_dict
,
loss
=
cls
.
build_model
(
feed_dict
,
get_data_from_feeder
,
main
,
memory_opt
,
method
,
optimizer
)
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
startup
)
exec_strategy
=
fluid
.
ExecutionStrategy
()
exec_strategy
.
allow_op_delay
=
allow_op_delay
if
use_fast_executor
:
exec_strategy
.
use_experimental_executor
=
True
build_strategy
=
fluid
.
BuildStrategy
()
build_strategy
.
reduce_strategy
=
fluid
.
BuildStrategy
.
ReduceStrategy
.
Reduce
\
if
use_reduce
else
fluid
.
BuildStrategy
.
ReduceStrategy
.
AllReduce
build_strategy
.
fuse_elewise_add_act_ops
=
fuse_elewise_add_act_ops
build_strategy
.
fuse_relu_depthwise_conv
=
fuse_relu_depthwise_conv
build_strategy
.
fuse_all_optimizer_ops
=
fuse_all_optimizer_ops
build_strategy
.
fuse_all_reduce_ops
=
fuse_all_reduce_ops
build_strategy
.
memory_optimize
=
use_ir_memory_optimize
build_strategy
.
enable_inplace
=
enable_inplace
build_strategy
.
enable_sequential_execution
=
enable_sequential_execution
build_strategy
,
exec_strategy
=
cls
.
set_strategy
(
enable_inplace
,
enable_sequential_execution
,
fuse_all_optimizer_ops
,
fuse_all_reduce_ops
,
fuse_elewise_add_act_ops
,
fuse_relu_depthwise_conv
,
use_fast_executor
,
use_ir_memory_optimize
,
use_reduce
,
use_cuda
)
if
use_cuda
and
core
.
is_compiled_with_cuda
():
build_strategy
.
remove_unnecessary_lock
=
True
if
use_parallel_executor
:
binary
=
compiler
.
CompiledProgram
(
main
).
with_data_parallel
(
loss_name
=
loss
.
name
,
...
...
@@ -114,13 +84,12 @@ class TestParallelExecutorBase(unittest.TestCase):
batch_size
*=
fluid
.
core
.
get_cuda_device_count
(
)
if
use_cuda
else
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
begin
=
time
.
time
()
first_loss
,
=
run_executor
(
exe
=
exe
,
binary
=
binary
,
feed
=
feed_dict
,
fetch_list
=
[
loss
.
name
])
for
i
in
range
(
iter
):
for
_
in
range
(
iter
):
run_executor
(
exe
=
exe
,
binary
=
binary
,
feed
=
feed_dict
,
fetch_list
=
[])
last_loss
,
=
run_executor
(
exe
=
exe
,
binary
=
binary
,
feed
=
feed_dict
,
fetch_list
=
[
loss
.
name
])
end
=
time
.
time
()
...
...
@@ -138,3 +107,85 @@ class TestParallelExecutorBase(unittest.TestCase):
print
(
first_loss
,
last_loss
)
# self.assertGreater(first_loss[0], last_loss[0])
return
first_loss
,
last_loss
@
classmethod
def
check_pass_conflict
(
cls
,
method
,
use_cuda
=
True
,
memory_opt
=
False
,
feed_dict
=
None
,
get_data_from_feeder
=
None
,
use_reduce
=
False
,
use_ir_memory_optimize
=
True
,
enable_inplace
=
True
,
fuse_elewise_add_act_ops
=
False
,
fuse_all_optimizer_ops
=
False
,
fuse_all_reduce_ops
=
False
,
fuse_relu_depthwise_conv
=
False
,
optimizer
=
fluid
.
optimizer
.
Adam
,
use_fast_executor
=
True
,
enable_sequential_execution
=
False
):
main
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main
,
startup
):
feed_dict
,
loss
=
cls
.
build_model
(
feed_dict
,
get_data_from_feeder
,
main
,
memory_opt
,
method
,
optimizer
)
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
startup
)
build_strategy
,
exec_strategy
=
cls
.
set_strategy
(
enable_inplace
,
enable_sequential_execution
,
fuse_all_optimizer_ops
,
fuse_all_reduce_ops
,
fuse_elewise_add_act_ops
,
fuse_relu_depthwise_conv
,
use_fast_executor
,
use_ir_memory_optimize
,
use_reduce
,
use_cuda
)
binary
=
compiler
.
CompiledProgram
(
main
).
with_data_parallel
(
loss_name
=
loss
.
name
,
build_strategy
=
build_strategy
,
exec_strategy
=
exec_strategy
)
exe
.
run
(
binary
,
feed
=
feed_dict
,
fetch_list
=
[
loss
.
name
])
@
classmethod
def
set_strategy
(
cls
,
enable_inplace
,
enable_sequential_execution
,
fuse_all_optimizer_ops
,
fuse_all_reduce_ops
,
fuse_elewise_add_act_ops
,
fuse_relu_depthwise_conv
,
use_fast_executor
,
use_ir_memory_optimize
,
use_reduce
,
use_cuda
):
exec_strategy
=
fluid
.
ExecutionStrategy
()
if
use_fast_executor
:
exec_strategy
.
use_experimental_executor
=
True
build_strategy
=
fluid
.
BuildStrategy
()
build_strategy
.
reduce_strategy
=
fluid
.
BuildStrategy
.
ReduceStrategy
.
Reduce
\
if
use_reduce
else
fluid
.
BuildStrategy
.
ReduceStrategy
.
AllReduce
build_strategy
.
fuse_elewise_add_act_ops
=
fuse_elewise_add_act_ops
build_strategy
.
fuse_relu_depthwise_conv
=
fuse_relu_depthwise_conv
build_strategy
.
fuse_all_optimizer_ops
=
fuse_all_optimizer_ops
build_strategy
.
fuse_all_reduce_ops
=
fuse_all_reduce_ops
build_strategy
.
memory_optimize
=
use_ir_memory_optimize
build_strategy
.
enable_inplace
=
enable_inplace
build_strategy
.
enable_sequential_execution
=
enable_sequential_execution
if
use_cuda
and
core
.
is_compiled_with_cuda
():
build_strategy
.
remove_unnecessary_lock
=
True
return
build_strategy
,
exec_strategy
@
classmethod
def
build_model
(
cls
,
feed_dict
,
get_data_from_feeder
,
main
,
memory_opt
,
method
,
optimizer
):
loss
=
method
(
use_feed
=
feed_dict
is
not
None
)
# NOTE(zjl): memory_optimize/inplace pass would not require
# that loss.persistable = True
loss
.
persistable
=
memory_opt
if
optimizer
:
optimizer
().
minimize
(
loss
)
if
memory_opt
:
fluid
.
memory_optimize
(
main
)
if
get_data_from_feeder
is
not
None
:
assert
feed_dict
is
None
feed_dict
=
get_data_from_feeder
()
return
feed_dict
,
loss
python/paddle/fluid/tests/unittests/test_dist_base.py
浏览文件 @
17d62ab2
...
...
@@ -165,7 +165,6 @@ class TestDistRunnerBase(object):
exec_strategy
=
fluid
.
ExecutionStrategy
()
exec_strategy
.
num_threads
=
1
exec_strategy
.
allow_op_delay
=
False
build_stra
=
fluid
.
BuildStrategy
()
# FIXME force disable enable_inplace and memory_optimize
...
...
python/paddle/fluid/tests/unittests/test_fuse_optimizer_pass.py
浏览文件 @
17d62ab2
...
...
@@ -74,12 +74,6 @@ class TestFuseAdamOps(TestFuseOptimizationOps):
def
optimizer
(
self
,
learning_rate
=
1e-4
):
return
fluid
.
optimizer
.
Adam
(
learning_rate
=
learning_rate
)
def
test_simple_fc_with_fuse_op
(
self
):
self
.
_decorate_compare_fused_optimizer_ops
(
simple_fc_net
,
True
,
optimizer
=
self
.
optimizer
)
self
.
_decorate_compare_fused_optimizer_ops
(
simple_fc_net
,
False
,
optimizer
=
self
.
optimizer
)
def
test_batchnorm_fc_with_fuse_op
(
self
):
self
.
_decorate_compare_fused_optimizer_ops
(
fc_with_batchnorm
,
True
,
optimizer
=
self
.
optimizer
)
...
...
@@ -142,5 +136,48 @@ class TestSpareFuseMomentumOps(TestSpareFuseAdamOps):
learning_rate
=
learning_rate
,
momentum
=
0.1
)
class
TestPassConflictBase
(
TestFuseAdamOps
):
def
_compare_fused_optimizer_ops
(
self
,
model
,
use_cuda
,
feed_dict
=
None
,
get_data_from_feeder
=
None
,
optimizer
=
fluid
.
optimizer
.
Adam
):
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
return
self
.
check_pass_conflict
(
model
,
feed_dict
=
feed_dict
,
get_data_from_feeder
=
get_data_from_feeder
,
use_cuda
=
use_cuda
,
fuse_all_optimizer_ops
=
True
,
memory_opt
=
False
,
# avoid the gradient's name changed in Python side.
optimizer
=
optimizer
,
enable_sequential_execution
=
True
)
class
TestFuseAdamOpsPassConflict
(
TestPassConflictBase
):
def
optimizer
(
self
,
learning_rate
=
1e-4
):
return
fluid
.
optimizer
.
Adam
(
learning_rate
=
learning_rate
)
def
test_batchnorm_fc_with_fuse_op
(
self
):
self
.
_decorate_compare_fused_optimizer_ops
(
fc_with_batchnorm
,
True
,
optimizer
=
self
.
optimizer
)
self
.
_decorate_compare_fused_optimizer_ops
(
fc_with_batchnorm
,
False
,
optimizer
=
self
.
optimizer
)
class
TestFuseSGDOpsPassConflict
(
TestFuseAdamOpsPassConflict
):
def
optimizer
(
self
,
learning_rate
=
1e-3
):
return
fluid
.
optimizer
.
SGD
(
learning_rate
=
learning_rate
)
class
TestFuseMomentumOpsPassConflict
(
TestFuseAdamOpsPassConflict
):
def
optimizer
(
self
,
learning_rate
=
1e-3
):
return
fluid
.
optimizer
.
Momentum
(
learning_rate
=
learning_rate
,
momentum
=
0.1
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py
浏览文件 @
17d62ab2
...
...
@@ -135,14 +135,12 @@ class TestMNIST(TestParallelExecutorBase):
single_first_loss
,
single_last_loss
=
self
.
check_network_convergence
(
method
=
simple_fc_net
,
seed
=
1
,
feed_dict
=
{
"image"
:
img
,
"label"
:
label
},
use_cuda
=
use_cuda
,
use_parallel_executor
=
False
)
parallel_first_loss
,
parallel_last_loss
=
self
.
check_network_convergence
(
method
=
simple_fc_net
,
seed
=
1
,
feed_dict
=
{
"image"
:
img
,
"label"
:
label
},
use_cuda
=
use_cuda
,
...
...
python/paddle/fluid/tests/unittests/test_parallel_executor_pg.py
浏览文件 @
17d62ab2
...
...
@@ -54,14 +54,12 @@ class TestMNIST(TestParallelExecutorBase):
img
,
label
=
init_data
()
single_first_loss
,
single_last_loss
=
self
.
check_network_convergence
(
method
=
simple_fc_net
,
seed
=
1
,
feed_dict
=
{
"image"
:
img
,
"label"
:
label
},
use_cuda
=
use_cuda
,
use_parallel_executor
=
False
)
parallel_first_loss
,
parallel_last_loss
=
self
.
check_network_convergence
(
method
=
simple_fc_net
,
seed
=
1
,
feed_dict
=
{
"image"
:
img
,
"label"
:
label
},
use_cuda
=
use_cuda
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录