Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
e9409665
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
e9409665
编写于
4月 12, 2019
作者:
C
chengduo
提交者:
GitHub
4月 12, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Refine Fuse Optimize Ops (#16810)
* fix bug of fuse optimize ops
上级
1f2afccf
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
160 addition
and
63 deletion
+160
-63
paddle/fluid/framework/details/build_strategy.cc
paddle/fluid/framework/details/build_strategy.cc
+0
-2
paddle/fluid/framework/details/fuse_adam_op_pass.cc
paddle/fluid/framework/details/fuse_adam_op_pass.cc
+5
-5
paddle/fluid/framework/details/fuse_optimizer_op_pass.cc
paddle/fluid/framework/details/fuse_optimizer_op_pass.cc
+127
-48
paddle/fluid/framework/details/fuse_optimizer_op_pass.h
paddle/fluid/framework/details/fuse_optimizer_op_pass.h
+23
-3
paddle/fluid/framework/details/fuse_sgd_op_pass.cc
paddle/fluid/framework/details/fuse_sgd_op_pass.cc
+5
-5
未找到文件。
paddle/fluid/framework/details/build_strategy.cc
浏览文件 @
e9409665
...
@@ -101,8 +101,6 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
...
@@ -101,8 +101,6 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
"mode."
;
"mode."
;
strategy_
.
fuse_all_optimizer_ops_
=
false
;
strategy_
.
fuse_all_optimizer_ops_
=
false
;
}
else
{
}
else
{
VLOG
(
10
)
<<
"Add alloc_continuous_space_for_grad_pass"
;
AppendPass
(
"alloc_continuous_space_for_grad_pass"
);
// NOTE: fuse_all_xx_ops will count the number of xx operator first,
// NOTE: fuse_all_xx_ops will count the number of xx operator first,
// if the number is zero, fuse_all_reduce_ops will do nothing.
// if the number is zero, fuse_all_reduce_ops will do nothing.
// Currently, only one type of optimization algorithm can be fused.
// Currently, only one type of optimization algorithm can be fused.
...
...
paddle/fluid/framework/details/fuse_adam_op_pass.cc
浏览文件 @
e9409665
...
@@ -24,7 +24,7 @@ namespace details {
...
@@ -24,7 +24,7 @@ namespace details {
const
std
::
string
FuseAdamOpPass
::
GetOpType
()
const
{
return
"adam"
;
}
const
std
::
string
FuseAdamOpPass
::
GetOpType
()
const
{
return
"adam"
;
}
const
std
::
vector
<
std
::
string
>
FuseAdamOpPass
::
GetAuxiliaryVarNames
()
const
{
const
std
::
vector
<
std
::
string
>
FuseAdamOpPass
::
GetAuxiliaryVarNames
()
const
{
return
{
"
Param"
,
"
Moment1"
,
"Moment2"
,
"Beta1Pow"
,
"Beta2Pow"
};
return
{
"Moment1"
,
"Moment2"
,
"Beta1Pow"
,
"Beta2Pow"
};
}
}
void
FuseAdamOpPass
::
FuseOptimizerOps
(
void
FuseAdamOpPass
::
FuseOptimizerOps
(
...
@@ -77,16 +77,16 @@ void FuseAdamOpPass::FuseAdamOps(
...
@@ -77,16 +77,16 @@ void FuseAdamOpPass::FuseAdamOps(
VLOG
(
10
)
<<
"Insert adam to graph "
;
VLOG
(
10
)
<<
"Insert adam to graph "
;
OpDesc
adam_desc
(
adam_ops
[
0
]
->
Op
()
->
Block
());
OpDesc
adam_desc
(
adam_ops
[
0
]
->
Op
()
->
Block
());
adam_desc
.
SetType
(
"adam"
);
adam_desc
.
SetType
(
"adam"
);
adam_desc
.
SetInput
(
"Param"
,
{
fused_vars_name
.
at
(
"Param"
)});
adam_desc
.
SetInput
(
kParam
,
{
fused_vars_name
.
at
(
kParam
)});
adam_desc
.
SetInput
(
"Grad"
,
{
fused_vars_name
.
at
(
"Grad"
)});
adam_desc
.
SetInput
(
kGrad
,
{
fused_vars_name
.
at
(
kGrad
)});
adam_desc
.
SetInput
(
"Moment1"
,
{
fused_vars_name
.
at
(
"Moment1"
)});
adam_desc
.
SetInput
(
"Moment1"
,
{
fused_vars_name
.
at
(
"Moment1"
)});
adam_desc
.
SetInput
(
"Moment2"
,
{
fused_vars_name
.
at
(
"Moment2"
)});
adam_desc
.
SetInput
(
"Moment2"
,
{
fused_vars_name
.
at
(
"Moment2"
)});
// TODO(zcd): The LearningRate, Beta1Pow, Beta2Pow should be equal.
// TODO(zcd): The LearningRate, Beta1Pow, Beta2Pow should be equal.
adam_desc
.
SetInput
(
"LearningRate"
,
adam_ops
[
0
]
->
Op
()
->
Input
(
"LearningRate"
));
adam_desc
.
SetInput
(
kLearningRate
,
adam_ops
[
0
]
->
Op
()
->
Input
(
kLearningRate
));
adam_desc
.
SetInput
(
"Beta1Pow"
,
adam_ops
[
0
]
->
Op
()
->
Input
(
"Beta1Pow"
));
adam_desc
.
SetInput
(
"Beta1Pow"
,
adam_ops
[
0
]
->
Op
()
->
Input
(
"Beta1Pow"
));
adam_desc
.
SetInput
(
"Beta2Pow"
,
adam_ops
[
0
]
->
Op
()
->
Input
(
"Beta2Pow"
));
adam_desc
.
SetInput
(
"Beta2Pow"
,
adam_ops
[
0
]
->
Op
()
->
Input
(
"Beta2Pow"
));
adam_desc
.
SetOutput
(
"ParamOut"
,
{
fused_vars_name
.
at
(
"Param"
)});
adam_desc
.
SetOutput
(
"ParamOut"
,
{
fused_vars_name
.
at
(
kParam
)});
adam_desc
.
SetOutput
(
"Moment1Out"
,
{
fused_vars_name
.
at
(
"Moment1"
)});
adam_desc
.
SetOutput
(
"Moment1Out"
,
{
fused_vars_name
.
at
(
"Moment1"
)});
adam_desc
.
SetOutput
(
"Moment2Out"
,
{
fused_vars_name
.
at
(
"Moment2"
)});
adam_desc
.
SetOutput
(
"Moment2Out"
,
{
fused_vars_name
.
at
(
"Moment2"
)});
adam_desc
.
SetAttr
(
"beta1"
,
beta1
);
adam_desc
.
SetAttr
(
"beta1"
,
beta1
);
...
...
paddle/fluid/framework/details/fuse_optimizer_op_pass.cc
浏览文件 @
e9409665
...
@@ -29,7 +29,9 @@ void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const {
...
@@ -29,7 +29,9 @@ void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const {
auto
&
local_scopes
=
Get
<
const
std
::
vector
<
Scope
*>>
(
kLocalScopes
);
auto
&
local_scopes
=
Get
<
const
std
::
vector
<
Scope
*>>
(
kLocalScopes
);
const
std
::
string
fuse_op_type
=
GetOpType
();
const
std
::
string
fuse_op_type
=
GetOpType
();
const
std
::
vector
<
std
::
string
>
aux_var_names
=
GetAuxiliaryVarNames
();
std
::
vector
<
std
::
string
>
aux_var_names
=
GetAuxiliaryVarNames
();
aux_var_names
.
emplace_back
(
kParam
);
aux_var_names
.
emplace_back
(
kGrad
);
// Step 1: Get the specified op and auxiliary variables.
// Step 1: Get the specified op and auxiliary variables.
std
::
vector
<
ir
::
Node
*>
topo_nodes
=
ir
::
TopologySortOperations
(
result
);
std
::
vector
<
ir
::
Node
*>
topo_nodes
=
ir
::
TopologySortOperations
(
result
);
...
@@ -61,7 +63,7 @@ void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const {
...
@@ -61,7 +63,7 @@ void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const {
result
.
Set
(
kFusedVars
,
new
FusedVars
);
result
.
Set
(
kFusedVars
,
new
FusedVars
);
}
}
std
::
unordered_map
<
std
::
string
,
std
::
string
>
fused_vars_name
;
std
::
unordered_map
<
std
::
string
,
std
::
string
>
fused_vars_name
;
fused_vars_name
.
reserve
(
aux_var_names
.
size
()
+
1
);
fused_vars_name
.
reserve
(
aux_var_names
.
size
());
auto
&
fused_var_set
=
result
.
Get
<
FusedVars
>
(
kFusedVars
);
auto
&
fused_var_set
=
result
.
Get
<
FusedVars
>
(
kFusedVars
);
const
std
::
string
prefix
(
kFusedVarNamePrefix
);
const
std
::
string
prefix
(
kFusedVarNamePrefix
);
// NOTE: the fused_var_name should be unique.
// NOTE: the fused_var_name should be unique.
...
@@ -75,39 +77,103 @@ void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const {
...
@@ -75,39 +77,103 @@ void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const {
}
}
// Step 3: Get the fused Gradient's name
// Step 3: Get the fused Gradient's name
auto
&
params_grads
=
result
.
Get
<
ParamsAndGrads
>
(
kParamsAndGrads
);
bool
grad_fused
=
false
;
if
(
!
result
.
Has
(
kFusedGrads
))
{
if
(
result
.
Has
(
kParamsAndGrads
))
{
PADDLE_THROW
(
auto
&
params_grads
=
result
.
Get
<
ParamsAndGrads
>
(
kParamsAndGrads
);
"The alloc_continuous_space_for_grad_pass should be called before this "
PADDLE_ENFORCE_EQ
(
"pass."
);
params_grads
.
size
(),
aux_var_set
.
at
(
kGrad
).
size
(),
}
"The number of gradients and optimizer ops is not equal."
);
auto
&
fused_grad
=
result
.
Get
<
FusedGrads
>
(
kFusedGrads
);
std
::
unordered_set
<
std
::
string
>
opt_grad_set
(
aux_var_set
.
at
(
kGrad
).
begin
(),
auto
&
fused_vars
=
result
.
Get
<
FusedVars
>
(
kFusedVars
);
aux_var_set
.
at
(
kGrad
).
end
());
auto
iter
=
std
::
find
(
fused_vars
.
begin
(),
fused_vars
.
end
(),
fused_grad
);
size_t
same_grad_num
=
0
;
PADDLE_ENFORCE
(
iter
!=
fused_vars
.
end
(),
"Not find the fused_grad."
);
for
(
auto
&
p_g
:
params_grads
)
{
fused_vars_name
.
emplace
(
"Grad"
,
fused_grad
);
if
(
opt_grad_set
.
count
(
p_g
.
second
))
{
++
same_grad_num
;
// Step 4: Sort the parameters and auxiliary variables according
}
// to parameters' name to make variables' name correspond correctly.
}
PADDLE_ENFORCE
(
result
.
Has
(
kParamsAndGrads
),
"Does't find kParamsAndGrads."
);
PADDLE_ENFORCE_EQ
(
params_grads
.
size
(),
aux_var_set
.
begin
()
->
second
.
size
(),
// NOTE(zcd): the gradient of kParamsAndGrads may be different with the
"The size of params_grads and aux_var_set are not equal."
);
// kGrad.
SortParametersAndAuxVars
(
params_grads
,
&
aux_var_set
,
&
opt_ops
);
if
(
same_grad_num
==
aux_var_set
.
at
(
kGrad
).
size
())
{
if
(
!
result
.
Has
(
kFusedGrads
))
{
// Step 5: Alloc continuous space for Parameters and AuxiliaryVar(e.g.
PADDLE_THROW
(
"The alloc_continuous_space_for_grad_pass should be called before "
"this pass."
);
}
auto
&
fused_grad
=
result
.
Get
<
FusedGrads
>
(
kFusedGrads
);
auto
&
fused_vars
=
result
.
Get
<
FusedVars
>
(
kFusedVars
);
auto
iter
=
std
::
find
(
fused_vars
.
begin
(),
fused_vars
.
end
(),
fused_grad
);
PADDLE_ENFORCE
(
iter
!=
fused_vars
.
end
(),
"Not find the fused_grad."
);
fused_vars_name
[
kGrad
]
=
fused_grad
;
// Sort the parameters and auxiliary variables according
// to parameters' name to make variables' name correspond correctly.
SortParametersAndAuxVars
(
params_grads
,
&
aux_var_set
,
&
opt_ops
);
grad_fused
=
true
;
}
}
// Step 4: Alloc continuous space for Parameters and AuxiliaryVar(e.g.
// Moment1, Moment2, Beta1Pow, Beta2Pow) of all the optimizer ops separately.
// Moment1, Moment2, Beta1Pow, Beta2Pow) of all the optimizer ops separately.
aux_var_names
.
pop_back
();
if
(
!
grad_fused
)
{
InitFusedGradsAndAllocSpaceForGrads
(
places
,
local_scopes
,
aux_var_set
.
at
(
kParam
),
aux_var_set
.
at
(
kGrad
),
fused_vars_name
.
at
(
kGrad
),
&
result
);
}
InitFusedVarsAndAllocSpaceForVars
(
places
,
local_scopes
,
aux_var_names
,
InitFusedVarsAndAllocSpaceForVars
(
places
,
local_scopes
,
aux_var_names
,
aux_var_set
,
fused_vars_name
);
aux_var_set
,
fused_vars_name
);
// Step
6
: Fuse optimizer Ops and Scale Ops
// Step
5
: Fuse optimizer Ops and Scale Ops
FuseOptimizerOps
(
aux_var_set
,
fused_vars_name
,
opt_ops
,
&
result
);
FuseOptimizerOps
(
aux_var_set
,
fused_vars_name
,
opt_ops
,
&
result
);
// Step
7
: Remove optimizer Ops
// Step
6
: Remove optimizer Ops
for
(
auto
&
opt_op
:
opt_ops
)
{
for
(
auto
&
opt_op
:
opt_ops
)
{
graph
->
RemoveNode
(
opt_op
);
graph
->
RemoveNode
(
opt_op
);
}
}
}
}
void
FuseOptimizerOpPass
::
InitFusedGradsAndAllocSpaceForGrads
(
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
std
::
string
>
&
params
,
const
std
::
vector
<
std
::
string
>
&
grads
,
const
std
::
string
&
fused_grad_name
,
ir
::
Graph
*
result
)
const
{
// Get Var Nodes
std
::
unordered_map
<
std
::
string
,
ir
::
Node
*>
vars
;
for
(
ir
::
Node
*
node
:
result
->
Nodes
())
{
if
(
node
->
IsVar
()
&&
node
->
Var
())
{
// Note: The graph may have the same name node. For example, parameter
// is the input of operator and it also is the output of optimizer;
vars
.
emplace
(
node
->
Var
()
->
Name
(),
node
);
}
}
// Init Grads
for
(
auto
it
=
local_scopes
.
rbegin
();
it
!=
local_scopes
.
rend
();
++
it
)
{
auto
&
scope
=
*
it
;
VLOG
(
10
)
<<
"Init "
<<
fused_grad_name
;
PADDLE_ENFORCE
(
scope
->
FindVar
(
fused_grad_name
)
==
nullptr
,
"%s has existed in scope."
,
fused_grad_name
);
scope
->
Var
(
fused_grad_name
)
->
GetMutable
<
LoDTensor
>
();
for
(
auto
&
grad_var_name
:
grads
)
{
auto
iter
=
vars
.
find
(
grad_var_name
);
PADDLE_ENFORCE
(
iter
!=
vars
.
end
());
PADDLE_ENFORCE_NOT_NULL
(
iter
->
second
->
Var
());
PADDLE_ENFORCE_EQ
(
iter
->
second
->
Var
()
->
GetType
(),
proto
::
VarType
::
LOD_TENSOR
);
scope
->
Var
(
grad_var_name
)
->
GetMutable
<
LoDTensor
>
();
}
}
// Define Ops
ProgramDesc
program_desc
;
auto
*
global_block
=
program_desc
.
MutableBlock
(
0
);
AppendAllocContinuousSpace
(
params
,
grads
,
fused_grad_name
,
global_block
,
false
,
false
);
// Run Ops
RunInitOps
(
places
,
local_scopes
,
*
global_block
);
}
void
FuseOptimizerOpPass
::
InitFusedVarsAndAllocSpaceForVars
(
void
FuseOptimizerOpPass
::
InitFusedVarsAndAllocSpaceForVars
(
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
...
@@ -115,37 +181,49 @@ void FuseOptimizerOpPass::InitFusedVarsAndAllocSpaceForVars(
...
@@ -115,37 +181,49 @@ void FuseOptimizerOpPass::InitFusedVarsAndAllocSpaceForVars(
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
const
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
&
aux_var_set
,
&
aux_var_set
,
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>
&
fused_vars_name
)
const
{
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>
&
fused_vars_name
)
const
{
VLOG
(
10
)
<<
"Init FusedVars."
;
// Init Vars
// Alloc parameters and auxiliary vars in the respective scope.
for
(
auto
&
var_name
:
aux_var_names
)
{
size_t
idx
=
local_scopes
.
size
();
auto
&
fused_var_name
=
fused_vars_name
.
at
(
var_name
);
for
(
auto
iter
=
local_scopes
.
rbegin
();
iter
!=
local_scopes
.
rend
();
InitVars
(
local_scopes
,
fused_var_name
);
++
iter
,
--
idx
)
{
auto
&
scope
=
*
iter
;
for
(
auto
&
var_name
:
aux_var_names
)
{
auto
fused_var_name
=
fused_vars_name
.
at
(
var_name
);
VLOG
(
10
)
<<
"Init "
<<
fused_var_name
;
PADDLE_ENFORCE
(
scope
->
FindVar
(
fused_var_name
)
==
nullptr
,
"%s has exist in scope[%d]"
,
fused_var_name
,
idx
);
scope
->
Var
(
fused_var_name
)
->
GetMutable
<
LoDTensor
>
();
}
}
}
// Define Ops
ProgramDesc
program_desc
;
ProgramDesc
program_desc
;
auto
*
global_block
=
program_desc
.
MutableBlock
(
0
);
auto
*
global_block
=
program_desc
.
MutableBlock
(
0
);
for
(
auto
&
var_name
:
aux_var_names
)
{
for
(
auto
&
var_name
:
aux_var_names
)
{
AppendAllocContinuousSpace
(
aux_var_set
.
at
(
var_name
),
AppendAllocContinuousSpace
(
fused_vars_name
.
at
(
var_name
),
true
,
aux_var_set
.
at
(
var_name
),
aux_var_set
.
at
(
var_name
)
,
global_block
);
fused_vars_name
.
at
(
var_name
),
global_block
,
true
);
}
}
// Run Ops
RunInitOps
(
places
,
local_scopes
,
*
global_block
);
}
void
FuseOptimizerOpPass
::
RunInitOps
(
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
BlockDesc
&
global_block
)
const
{
for
(
size_t
i
=
0
;
i
<
local_scopes
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
local_scopes
.
size
();
++
i
)
{
for
(
auto
&
op_desc
:
global_block
->
AllOps
())
{
for
(
auto
&
op_desc
:
global_block
.
AllOps
())
{
auto
op
=
OpRegistry
::
CreateOp
(
*
op_desc
);
auto
op
=
OpRegistry
::
CreateOp
(
*
op_desc
);
op
->
Run
(
*
local_scopes
[
i
],
places
[
i
]);
op
->
Run
(
*
local_scopes
[
i
],
places
[
i
]);
}
}
}
}
}
}
void
FuseOptimizerOpPass
::
InitVars
(
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
string
&
fused_var_name
)
const
{
VLOG
(
10
)
<<
"Init FusedVars."
;
// Alloc parameters and auxiliary vars in the respective scope.
size_t
idx
=
local_scopes
.
size
();
for
(
auto
iter
=
local_scopes
.
rbegin
();
iter
!=
local_scopes
.
rend
();
++
iter
,
--
idx
)
{
auto
&
scope
=
*
iter
;
VLOG
(
10
)
<<
"Init "
<<
fused_var_name
;
PADDLE_ENFORCE
(
scope
->
FindVar
(
fused_var_name
)
==
nullptr
,
"%s has exist in scope[%d]"
,
fused_var_name
,
idx
);
scope
->
Var
(
fused_var_name
)
->
GetMutable
<
LoDTensor
>
();
}
}
void
FuseOptimizerOpPass
::
SortParametersAndAuxVars
(
void
FuseOptimizerOpPass
::
SortParametersAndAuxVars
(
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
&
params_grads
,
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>>
&
params_grads
,
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
*
aux_vars_set
,
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
*
aux_vars_set
,
...
@@ -203,15 +281,16 @@ void FuseOptimizerOpPass::GetSpecifiedOpsAndVars(
...
@@ -203,15 +281,16 @@ void FuseOptimizerOpPass::GetSpecifiedOpsAndVars(
}
}
void
FuseOptimizerOpPass
::
AppendAllocContinuousSpace
(
void
FuseOptimizerOpPass
::
AppendAllocContinuousSpace
(
const
std
::
vector
<
std
::
string
>
&
args
,
const
std
::
string
&
out_arg
,
const
std
::
vector
<
std
::
string
>
&
in_args
,
bool
copy_data
,
BlockDesc
*
global_block
)
const
{
const
std
::
vector
<
std
::
string
>
&
out_args
,
const
std
::
string
&
fused_out_arg
,
BlockDesc
*
global_block
,
bool
copy_data
,
bool
check_name
)
const
{
auto
op_desc
=
global_block
->
AppendOp
();
auto
op_desc
=
global_block
->
AppendOp
();
op_desc
->
SetType
(
"alloc_continuous_space"
);
op_desc
->
SetType
(
"alloc_continuous_space"
);
op_desc
->
SetInput
(
"Input"
,
args
);
op_desc
->
SetInput
(
"Input"
,
in_
args
);
op_desc
->
SetOutput
(
"Output"
,
args
);
op_desc
->
SetOutput
(
"Output"
,
out_
args
);
op_desc
->
SetOutput
(
"FusedOutput"
,
{
out_arg
});
op_desc
->
SetOutput
(
"FusedOutput"
,
{
fused_
out_arg
});
op_desc
->
SetAttr
(
"copy_data"
,
copy_data
);
op_desc
->
SetAttr
(
"copy_data"
,
copy_data
);
op_desc
->
SetAttr
(
"check_name"
,
tru
e
);
op_desc
->
SetAttr
(
"check_name"
,
check_nam
e
);
}
}
void
FuseOptimizerOpPass
::
InserInputAndOutputForOptOps
(
void
FuseOptimizerOpPass
::
InserInputAndOutputForOptOps
(
...
...
paddle/fluid/framework/details/fuse_optimizer_op_pass.h
浏览文件 @
e9409665
...
@@ -27,6 +27,10 @@ namespace paddle {
...
@@ -27,6 +27,10 @@ namespace paddle {
namespace
framework
{
namespace
framework
{
namespace
details
{
namespace
details
{
constexpr
char
kGrad
[]
=
"Grad"
;
constexpr
char
kParam
[]
=
"Param"
;
constexpr
char
kLearningRate
[]
=
"LearningRate"
;
class
FuseOptimizerOpPass
:
public
ir
::
Pass
{
class
FuseOptimizerOpPass
:
public
ir
::
Pass
{
protected:
protected:
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
;
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
;
...
@@ -56,9 +60,18 @@ class FuseOptimizerOpPass : public ir::Pass {
...
@@ -56,9 +60,18 @@ class FuseOptimizerOpPass : public ir::Pass {
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
*
aux_args_name
)
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
*
aux_args_name
)
const
;
const
;
void
AppendAllocContinuousSpace
(
const
std
::
vector
<
std
::
string
>
&
args
,
void
AppendAllocContinuousSpace
(
const
std
::
vector
<
std
::
string
>
&
in_args
,
const
std
::
string
&
out_arg
,
bool
copy_data
,
const
std
::
vector
<
std
::
string
>
&
out_args
,
BlockDesc
*
global_block
)
const
;
const
std
::
string
&
fused_out_arg
,
BlockDesc
*
global_block
,
bool
copy_data
,
bool
check_name
=
true
)
const
;
void
InitFusedGradsAndAllocSpaceForGrads
(
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
std
::
string
>
&
params
,
const
std
::
vector
<
std
::
string
>
&
grads
,
const
std
::
string
&
fused_grad_name
,
ir
::
Graph
*
result
)
const
;
void
InitFusedVarsAndAllocSpaceForVars
(
void
InitFusedVarsAndAllocSpaceForVars
(
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
...
@@ -68,6 +81,13 @@ class FuseOptimizerOpPass : public ir::Pass {
...
@@ -68,6 +81,13 @@ class FuseOptimizerOpPass : public ir::Pass {
&
aux_var_set
,
&
aux_var_set
,
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>
&
fused_vars_name
)
const
std
::
unordered_map
<
std
::
string
,
std
::
string
>
&
fused_vars_name
)
const
;
const
;
void
RunInitOps
(
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
BlockDesc
&
global_block
)
const
;
void
InitVars
(
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
string
&
fused_var_name
)
const
;
};
};
}
// namespace details
}
// namespace details
...
...
paddle/fluid/framework/details/fuse_sgd_op_pass.cc
浏览文件 @
e9409665
...
@@ -24,7 +24,7 @@ namespace details {
...
@@ -24,7 +24,7 @@ namespace details {
const
std
::
string
FuseSgdOpPass
::
GetOpType
()
const
{
return
"sgd"
;
}
const
std
::
string
FuseSgdOpPass
::
GetOpType
()
const
{
return
"sgd"
;
}
const
std
::
vector
<
std
::
string
>
FuseSgdOpPass
::
GetAuxiliaryVarNames
()
const
{
const
std
::
vector
<
std
::
string
>
FuseSgdOpPass
::
GetAuxiliaryVarNames
()
const
{
return
{
"Param"
};
return
{};
}
}
void
FuseSgdOpPass
::
FuseOptimizerOps
(
void
FuseSgdOpPass
::
FuseOptimizerOps
(
...
@@ -50,12 +50,12 @@ void FuseSgdOpPass::FuseSgdOps(
...
@@ -50,12 +50,12 @@ void FuseSgdOpPass::FuseSgdOps(
// Add fused scale
// Add fused scale
OpDesc
Sgd_desc
(
sgd_ops
[
0
]
->
Op
()
->
Block
());
OpDesc
Sgd_desc
(
sgd_ops
[
0
]
->
Op
()
->
Block
());
Sgd_desc
.
SetType
(
"sgd"
);
Sgd_desc
.
SetType
(
"sgd"
);
Sgd_desc
.
SetInput
(
"Param"
,
{
fused_vars_name
.
at
(
"Param"
)});
Sgd_desc
.
SetInput
(
kParam
,
{
fused_vars_name
.
at
(
kParam
)});
Sgd_desc
.
SetInput
(
"Grad"
,
{
fused_vars_name
.
at
(
"Grad"
)});
Sgd_desc
.
SetInput
(
kGrad
,
{
fused_vars_name
.
at
(
kGrad
)});
Sgd_desc
.
SetOutput
(
"ParamOut"
,
{
fused_vars_name
.
at
(
"Param"
)});
Sgd_desc
.
SetOutput
(
"ParamOut"
,
{
fused_vars_name
.
at
(
kParam
)});
// TODO(zcd): The LearningRate, Beta1Pow, Beta2Pow should be equal.
// TODO(zcd): The LearningRate, Beta1Pow, Beta2Pow should be equal.
Sgd_desc
.
SetInput
(
"LearningRate"
,
sgd_ops
[
0
]
->
Op
()
->
Input
(
"LearningRate"
));
Sgd_desc
.
SetInput
(
kLearningRate
,
sgd_ops
[
0
]
->
Op
()
->
Input
(
kLearningRate
));
// NOTE: multi_devices_pass requires that every op should have a role.
// NOTE: multi_devices_pass requires that every op should have a role.
Sgd_desc
.
SetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
(),
op_role
);
Sgd_desc
.
SetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
(),
op_role
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录