Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
1be6bf45
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
1be6bf45
编写于
8月 12, 2020
作者:
Y
Yiqun Liu
提交者:
GitHub
8月 12, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add assign to fusion_group and enhance inplace execution in fusion_group. (#26121)
上级
b2034c28
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
231 addition
and
187 deletion
+231
-187
paddle/fluid/framework/ir/fusion_group/code_generator.cc
paddle/fluid/framework/ir/fusion_group/code_generator.cc
+60
-37
paddle/fluid/framework/ir/fusion_group/code_generator.h
paddle/fluid/framework/ir/fusion_group/code_generator.h
+1
-1
paddle/fluid/framework/ir/fusion_group/code_generator_helper.h
...e/fluid/framework/ir/fusion_group/code_generator_helper.h
+6
-6
paddle/fluid/framework/ir/fusion_group/code_generator_tester.cc
.../fluid/framework/ir/fusion_group/code_generator_tester.cc
+2
-3
paddle/fluid/framework/ir/fusion_group/elementwise_group_detector.cc
...d/framework/ir/fusion_group/elementwise_group_detector.cc
+1
-1
paddle/fluid/framework/ir/fusion_group/fusion_group_pass.cc
paddle/fluid/framework/ir/fusion_group/fusion_group_pass.cc
+29
-39
paddle/fluid/framework/ir/fusion_group/operation.cc
paddle/fluid/framework/ir/fusion_group/operation.cc
+13
-9
paddle/fluid/framework/ir/fusion_group/subgraph.h
paddle/fluid/framework/ir/fusion_group/subgraph.h
+62
-40
paddle/fluid/operators/fused/fusion_group_op.cc
paddle/fluid/operators/fused/fusion_group_op.cc
+18
-9
paddle/fluid/operators/fused/fusion_group_op.h
paddle/fluid/operators/fused/fusion_group_op.h
+16
-16
paddle/fluid/operators/fused/fusion_group_op_test.cc
paddle/fluid/operators/fused/fusion_group_op_test.cc
+12
-14
python/paddle/fluid/tests/unittests/ir/test_ir_fusion_group_pass.py
...dle/fluid/tests/unittests/ir/test_ir_fusion_group_pass.py
+11
-12
未找到文件。
paddle/fluid/framework/ir/fusion_group/code_generator.cc
浏览文件 @
1be6bf45
...
...
@@ -68,11 +68,35 @@ static bool HasInput(Node* n, std::string name) {
return
input_names_set
.
find
(
name
)
!=
input_names_set
.
end
();
}
static
Node
*
GetInputVar
(
Node
*
n
,
const
std
::
string
&
name
)
{
PADDLE_ENFORCE_EQ
(
n
&&
n
->
IsOp
()
&&
n
->
Op
(),
true
,
platform
::
errors
::
InvalidArgument
(
"Expected node %p to be an operator node."
,
n
));
for
(
auto
*
in
:
n
->
inputs
)
{
if
(
in
->
Name
()
==
name
)
{
return
in
;
}
}
return
nullptr
;
}
static
Node
*
GetOutputVar
(
Node
*
n
,
const
std
::
string
&
name
)
{
PADDLE_ENFORCE_EQ
(
n
&&
n
->
IsOp
()
&&
n
->
Op
(),
true
,
platform
::
errors
::
InvalidArgument
(
"Expected node %p to be an operator node."
,
n
));
for
(
auto
*
out
:
n
->
outputs
)
{
if
(
out
->
Name
()
==
name
)
{
return
out
;
}
}
return
nullptr
;
}
std
::
vector
<
OperationExpression
>
CodeGenerator
::
ConvertToExpressions
(
SubGraph
*
subgraph
)
{
std
::
unordered_map
<
std
::
string
,
int
>
var_ids
=
EncodeVarNodes
(
subgraph
);
std
::
vector
<
Node
*>
intermediate_out_nodes
=
subgraph
->
GetIntermediateOutVarNodes
();
std
::
unordered_map
<
Node
*
,
int
>
var_ids
=
EncodeVarNodes
(
subgraph
);
std
::
unordered_set
<
Node
*>
intermediate_out_vars_set
=
subgraph
->
GetIntermediateOutVarNodes
Set
();
std
::
vector
<
OperationExpression
>
expressions
;
for
(
auto
*
node
:
subgraph
->
SortedNodes
())
{
if
(
node
&&
node
->
IsOp
()
&&
node
->
Op
())
{
...
...
@@ -92,11 +116,12 @@ std::vector<OperationExpression> CodeGenerator::ConvertToExpressions(
// "elementwise_add_grad", where "X", "Y" and "Out" are not used.
if
((
HasInput
(
node
,
name
)
&&
op
->
Input
(
name
).
size
()
>=
1U
))
{
for
(
size_t
i
=
0
;
i
<
op
->
Input
(
name
).
size
();
i
++
)
{
Node
*
input_var
=
GetInputVar
(
node
,
op
->
Input
(
name
)[
i
]);
PADDLE_ENFORCE_NE
(
var_ids
.
find
(
op
->
Input
(
name
)[
i
]
),
var_ids
.
end
(),
var_ids
.
find
(
input_var
),
var_ids
.
end
(),
platform
::
errors
::
InvalidArgument
(
"Input(%s) of operation %s is not set."
,
name
,
op
->
Type
()));
input_ids
.
push_back
(
var_ids
[
op
->
Input
(
name
)[
i
]
]);
input_ids
.
push_back
(
var_ids
[
input_var
]);
}
}
else
{
input_ids
.
push_back
(
-
1
);
...
...
@@ -106,31 +131,29 @@ std::vector<OperationExpression> CodeGenerator::ConvertToExpressions(
// Output ids should be set in fixed order, like:
// - dx, dy in backward operations
std
::
vector
<
int
>
output_ids
;
std
::
vector
<
int
>
intermediate_output_ids
;
std
::
vector
<
std
::
string
>
output_names
=
OperationMap
::
Instance
().
Get
(
op
->
Type
()).
output_names
;
std
::
unordered_map
<
int
,
bool
>
intermediate_state
;
for
(
auto
&
name
:
output_names
)
{
Node
*
output_var
=
GetOutputVar
(
node
,
op
->
Output
(
name
)[
0
]);
PADDLE_ENFORCE_NE
(
var_ids
.
find
(
o
p
->
Output
(
name
)[
0
]
),
var_ids
.
end
(),
var_ids
.
find
(
o
utput_var
),
var_ids
.
end
(),
platform
::
errors
::
InvalidArgument
(
"Output(%s) of operation %s is not set."
,
name
,
op
->
Type
()));
output_ids
.
push_back
(
var_ids
[
op
->
Output
(
name
)[
0
]]);
bool
enable_intermediate
=
false
;
for
(
auto
*
n
:
intermediate_out_nodes
)
{
if
(
n
->
Name
()
==
op
->
Output
(
name
)[
0
])
{
enable_intermediate
=
true
;
break
;
}
output_ids
.
push_back
(
var_ids
[
output_var
]);
if
(
!
subgraph
->
SaveIntermediateOut
()
&&
intermediate_out_vars_set
.
find
(
output_var
)
!=
intermediate_out_vars_set
.
end
())
{
intermediate_output_ids
.
push_back
(
var_ids
[
output_var
]);
}
intermediate_state
[
var_ids
[
op
->
Output
(
name
)[
0
]]]
=
enable_intermediate
;
}
std
::
string
lhs_type
=
ExtractDataType
(
node
->
outputs
);
std
::
string
rhs_type
=
ExtractDataType
(
node
->
inputs
);
auto
expression
=
OperationExpression
(
node
->
Name
(),
input_ids
,
output_ids
,
rhs_type
,
lhs_type
,
intermediate_
state
);
lhs_type
,
intermediate_
output_ids
);
expression
.
SetAttr
(
attr
);
expressions
.
push_back
(
expression
);
}
...
...
@@ -146,17 +169,18 @@ std::string CodeGenerator::Generate(
// TODO(liuyiqun): Check whether all expressions are elementwise operations.
std
::
set
<
int
>
input_ids
=
std
::
move
(
DistilInputIds
(
expressions
));
std
::
set
<
int
>
output_ids
=
std
::
move
(
DistilOutputIds
(
expressions
));
std
::
set
<
int
>
intermediate_ids
=
std
::
set
<
int
>
intermediate_
output_
ids
=
std
::
move
(
DistilIntermediateIds
(
expressions
));
std
::
unordered_map
<
int
,
std
::
string
>
dtypes
=
std
::
move
(
DistilDtypes
(
expressions
));
TemplateVariable
template_var
;
template_var
.
Add
(
"func_name"
,
func_name
);
template_var
.
Add
(
"parameters"
,
EmitParameters
(
input_ids
,
output_ids
,
intermediate_ids
,
dtypes
));
template_var
.
Add
(
"parameters"
,
EmitParameters
(
input_ids
,
output_ids
,
intermediate_output_ids
,
dtypes
));
template_var
.
Add
(
"compute_body"
,
EmitComputeBody
(
expressions
,
input_ids
,
output_ids
,
intermediate_ids
,
dtypes
));
intermediate_
output_
ids
,
dtypes
));
std
::
set
<
std
::
string
>
all_dtype
;
for
(
const
auto
&
type
:
dtypes
)
{
...
...
@@ -204,18 +228,14 @@ std::set<int> CodeGenerator::DistilOutputIds(
std
::
set
<
int
>
CodeGenerator
::
DistilIntermediateIds
(
const
std
::
vector
<
OperationExpression
>&
expressions
)
{
std
::
set
<
int
>
intermediate_ids
;
std
::
set
<
int
>
intermediate_
output_
ids
;
// Use std::set to remove the reptead id and get a ordered list.
for
(
size_t
i
=
0
;
i
<
expressions
.
size
();
i
++
)
{
for
(
auto
id
:
expressions
[
i
].
GetOutputIds
())
{
auto
intermediate_state
=
expressions
[
i
].
GetIntermediateState
();
if
(
intermediate_state
.
find
(
id
)
!=
intermediate_state
.
end
()
&&
intermediate_state
[
id
])
{
intermediate_ids
.
insert
(
id
);
}
for
(
auto
id
:
expressions
[
i
].
GetIntermediateOutputIds
())
{
intermediate_output_ids
.
insert
(
id
);
}
}
return
intermediate_ids
;
return
intermediate_
output_
ids
;
}
std
::
unordered_map
<
int
,
std
::
string
>
CodeGenerator
::
DistilDtypes
(
...
...
@@ -316,26 +336,29 @@ std::string CodeGenerator::EmitComputeBody(
return
load
.
str
()
+
compute
.
str
()
+
store
.
str
();
}
std
::
unordered_map
<
std
::
string
,
int
>
CodeGenerator
::
EncodeVarNodes
(
std
::
unordered_map
<
Node
*
,
int
>
CodeGenerator
::
EncodeVarNodes
(
SubGraph
*
subgraph
)
{
const
auto
&
input_var_nodes
=
subgraph
->
GetInputVarNodes
();
const
auto
&
output_var_nodes
=
subgraph
->
GetOutputVarNodes
();
// Encode all var nodes, including intermediate output var nodes.
const
auto
&
output_var_nodes
=
subgraph
->
GetOutputVarNodes
(
true
);
int
id
=
0
;
std
::
unordered_map
<
std
::
string
,
int
>
var_ids
;
std
::
unordered_map
<
Node
*
,
int
>
var_ids
;
// Numbering input vars.
for
(
auto
*
in
:
input_var_nodes
)
{
VLOG
(
3
)
<<
"Encoding input names:"
<<
in
->
Name
()
<<
", id:"
<<
id
;
if
(
var_ids
.
find
(
in
->
Name
())
==
var_ids
.
end
())
{
var_ids
[
in
->
Name
()]
=
id
++
;
VLOG
(
3
)
<<
"Encoding input names:"
<<
in
->
Name
()
<<
"("
<<
in
<<
"), id:"
<<
id
;
if
(
var_ids
.
find
(
in
)
==
var_ids
.
end
())
{
var_ids
[
in
]
=
id
++
;
}
}
// Encoding output vars.
for
(
auto
*
out
:
output_var_nodes
)
{
VLOG
(
3
)
<<
"Ecoding output names:"
<<
out
->
Name
()
<<
", id:"
<<
id
;
if
(
var_ids
.
find
(
out
->
Name
())
==
var_ids
.
end
())
{
var_ids
[
out
->
Name
()]
=
id
++
;
VLOG
(
3
)
<<
"Ecoding output names:"
<<
out
->
Name
()
<<
"("
<<
out
<<
"), id:"
<<
id
;
if
(
var_ids
.
find
(
out
)
==
var_ids
.
end
())
{
var_ids
[
out
]
=
id
++
;
}
}
return
var_ids
;
...
...
paddle/fluid/framework/ir/fusion_group/code_generator.h
浏览文件 @
1be6bf45
...
...
@@ -61,7 +61,7 @@ class CodeGenerator {
const
std
::
unordered_map
<
int
,
std
::
string
>&
dtypes
)
const
;
// Encode all var nodes in the subgraph with an unique number.
std
::
unordered_map
<
std
::
string
,
int
>
EncodeVarNodes
(
SubGraph
*
subgraph
);
std
::
unordered_map
<
Node
*
,
int
>
EncodeVarNodes
(
SubGraph
*
subgraph
);
private:
std
::
vector
<
CodeTemplate
>
code_templates_
;
...
...
paddle/fluid/framework/ir/fusion_group/code_generator_helper.h
浏览文件 @
1be6bf45
...
...
@@ -48,20 +48,20 @@ class OperationExpression {
std
::
string
op_type
,
const
std
::
vector
<
int
>&
input_ids
,
const
std
::
vector
<
int
>&
output_ids
,
std
::
string
rhs_type
,
std
::
string
lhs_type
,
const
std
::
unordered_map
<
int
,
bool
>&
intermediate_state
=
{})
const
std
::
vector
<
int
>&
intermediate_output_ids
=
{})
:
op_type_
(
op_type
),
input_ids_
(
input_ids
),
output_ids_
(
output_ids
),
rhs_type_
(
rhs_type
),
lhs_type_
(
lhs_type
),
intermediate_
state_
(
intermediate_state
)
{}
intermediate_
output_ids_
(
intermediate_output_ids
)
{}
std
::
string
GetOpType
()
const
{
return
op_type_
;
}
std
::
unordered_map
<
int
,
bool
>
GetIntermediateState
()
const
{
return
intermediate_state_
;
}
std
::
vector
<
int
>
GetInputIds
()
const
{
return
input_ids_
;
}
std
::
vector
<
int
>
GetOutputIds
()
const
{
return
output_ids_
;
}
std
::
vector
<
int
>
GetIntermediateOutputIds
()
const
{
return
intermediate_output_ids_
;
}
std
::
string
GetRHSType
()
const
{
return
rhs_type_
;
}
std
::
string
GetLHSType
()
const
{
return
lhs_type_
;
}
void
SetAttr
(
AttributeMap
attr
)
{
attr_
=
attr
;
}
...
...
@@ -84,7 +84,7 @@ class OperationExpression {
AttributeMap
attr_
;
std
::
string
rhs_type_
;
std
::
string
lhs_type_
;
std
::
unordered_map
<
int
,
bool
>
intermediate_state
_
;
std
::
vector
<
int
>
intermediate_output_ids
_
;
};
class
TemplateVariable
{
...
...
paddle/fluid/framework/ir/fusion_group/code_generator_tester.cc
浏览文件 @
1be6bf45
...
...
@@ -144,7 +144,6 @@ void CheckOutput(const std::vector<OperationExpression>& expressions,
LOG
(
INFO
)
<<
"Precision check failed from i = "
<<
id
<<
", expect: "
<<
expect
<<
", actual: "
<<
actual
;
EXPECT_LT
(
fabs
(
actual
-
expect
),
eps
);
break
;
}
}
}
...
...
@@ -465,7 +464,7 @@ TEST(code_generator, subgraph) {
for
(
std
::
string
dtype
:
{
"float"
,
"__half"
})
{
std
::
unique_ptr
<
paddle
::
framework
::
ir
::
Graph
>
graph
=
BuildGraph
(
false
,
dtype
);
fusion_group
::
SubGraph
subgraph
(
0
,
"elementwise_kernel_1"
,
fals
e
,
fusion_group
::
SubGraph
subgraph
(
0
,
"elementwise_kernel_1"
,
tru
e
,
graph
->
Nodes
());
// Expressions generated by code_generator (they may be different):
...
...
@@ -484,7 +483,7 @@ TEST(code_generator, subgraph_grad) {
for
(
std
::
string
dtype
:
{
"float"
,
"__half"
})
{
std
::
unique_ptr
<
paddle
::
framework
::
ir
::
Graph
>
graph
=
BuildGraph
(
true
,
dtype
);
fusion_group
::
SubGraph
subgraph
(
0
,
"elementwise_grad_kernel_1"
,
fals
e
,
fusion_group
::
SubGraph
subgraph
(
0
,
"elementwise_grad_kernel_1"
,
tru
e
,
DistilGradNodes
(
graph
));
// Expressions generated by code_generator (they may be different):
...
...
paddle/fluid/framework/ir/fusion_group/elementwise_group_detector.cc
浏览文件 @
1be6bf45
...
...
@@ -63,7 +63,7 @@ static bool IsEqualAndNotEmpty(const std::vector<int64_t>& l,
bool
GroupDetector
::
CheckPrecondition
(
const
Node
*
n
)
{
auto
check_data_type
=
[
&
](
const
std
::
vector
<
Node
*>&
nodes
)
->
bool
{
bool
is_first
=
true
;
proto
::
VarType
::
Type
data_type_0
;
proto
::
VarType
::
Type
data_type_0
=
proto
::
VarType
::
BOOL
;
for
(
auto
*
n
:
nodes
)
{
if
(
n
&&
n
->
IsVar
()
&&
n
->
Var
())
{
if
(
n
->
Var
()
->
GetType
()
!=
proto
::
VarType
::
LOD_TENSOR
)
{
...
...
paddle/fluid/framework/ir/fusion_group/fusion_group_pass.cc
浏览文件 @
1be6bf45
...
...
@@ -63,11 +63,6 @@ int FusionGroupPass::DetectFusionGroup(Graph* graph, int type) const {
std
::
unordered_set
<
Node
*>
(
vec
.
begin
(),
vec
.
end
()));
VLOG
(
3
)
<<
"subgraph: {
\n
"
<<
DebugString
(
subgraph
.
SortedNodes
())
<<
"}
\n
"
;
// In elementwise fused kernel, memory is the bound of execution,
// here we remove the output id to use less memory and less time.
if
(
subgraph
.
RemoveIntermediateOut
())
{
subgraph
.
DetectIntermediateOutWithGraph
(
graph
);
}
if
(
subgraph
.
IsValid
(
min_subgraph_size
))
{
subgraph
.
SetFuncName
(
"fused_elementwise_"
+
std
::
to_string
(
index
++
));
if
(
GenerateCode
(
&
subgraph
))
{
...
...
@@ -115,57 +110,52 @@ static int ExtractOpRole(fusion_group::SubGraph* subgraph) {
void
FusionGroupPass
::
InsertFusionGroupOp
(
Graph
*
graph
,
fusion_group
::
SubGraph
*
subgraph
)
const
{
const
std
::
vector
<
Node
*>&
input_vars_of_subgraph
=
subgraph
->
GetInputVarNodes
();
const
std
::
vector
<
Node
*>&
output_vars_of_subgraph
=
subgraph
->
GetOutputVarNodes
();
const
std
::
vector
<
Node
*>
intermediate_vars_of_subgraph
=
subgraph
->
GetIntermediateOutVarNodes
();
const
std
::
vector
<
Node
*>&
input_vars
=
subgraph
->
GetInputVarNodes
();
const
std
::
vector
<
Node
*>&
output_vars
=
subgraph
->
GetOutputVarNodes
(
subgraph
->
SaveIntermediateOut
());
std
::
unordered_set
<
Node
*>
external_nodes
;
OpDesc
op_desc
;
op_desc
.
SetType
(
"fusion_group"
);
// Prepare inputs.
std
::
vector
<
std
::
string
>
input_names
;
std
::
vector
<
std
::
string
>
inputs_data_types
;
for
(
auto
*
n
:
input_vars_of_subgraph
)
{
input_names
.
push_back
(
n
->
Name
());
inputs_data_types
.
push_back
(
DataTypeToString
(
n
->
Var
()
->
GetDataType
()));
external_nodes
.
insert
(
n
);
std
::
vector
<
int
>
input_dtypes
;
std
::
unordered_set
<
Node
*>
output_vars_set
(
output_vars
.
begin
(),
output_vars
.
end
());
for
(
auto
*
n
:
input_vars
)
{
// It is not an output var node.
if
(
output_vars_set
.
find
(
n
)
==
output_vars_set
.
end
())
{
input_names
.
push_back
(
n
->
Name
());
input_dtypes
.
push_back
(
n
->
Var
()
->
GetDataType
());
external_nodes
.
insert
(
n
);
}
}
op_desc
.
SetInput
(
"Inputs"
,
input_names
);
// Prepare outputs.
std
::
vector
<
std
::
string
>
output_names
;
std
::
vector
<
std
::
string
>
outs_data_types
;
std
::
vector
<
Node
*>
output_var_without_intermediate
;
for
(
auto
*
n
:
output_vars_of_subgraph
)
{
auto
it_input
=
find
(
input_vars_of_subgraph
.
begin
(),
input_vars_of_subgraph
.
end
(),
n
);
auto
it_intermediate
=
find
(
intermediate_vars_of_subgraph
.
begin
(),
intermediate_vars_of_subgraph
.
end
(),
n
);
if
(
it_intermediate
==
intermediate_vars_of_subgraph
.
end
()
&&
it_input
==
input_vars_of_subgraph
.
end
())
{
output_names
.
push_back
(
n
->
Name
());
outs_data_types
.
push_back
(
DataTypeToString
(
n
->
Var
()
->
GetDataType
()));
output_var_without_intermediate
.
push_back
(
n
);
}
std
::
vector
<
int
>
output_dtypes
;
for
(
auto
*
n
:
output_vars
)
{
output_names
.
push_back
(
n
->
Name
());
output_dtypes
.
push_back
(
n
->
Var
()
->
GetDataType
());
external_nodes
.
insert
(
n
);
}
OpDesc
op_desc
;
op_desc
.
SetType
(
"fusion_group"
);
op_desc
.
SetInput
(
"Inputs"
,
input_names
);
op_desc
.
SetOutput
(
"Outs"
,
output_names
);
op_desc
.
SetAttr
(
"inputs_d
ata_type"
,
inputs_data_
types
);
op_desc
.
SetAttr
(
"outs_d
ata_type"
,
outs_data_
types
);
op_desc
.
SetAttr
(
"inputs_d
type"
,
input_d
types
);
op_desc
.
SetAttr
(
"outs_d
type"
,
output_d
types
);
op_desc
.
SetAttr
(
"type"
,
subgraph
->
GetType
());
op_desc
.
SetAttr
(
"func_name"
,
subgraph
->
GetFuncName
());
op_desc
.
SetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
(),
ExtractOpRole
(
subgraph
));
Node
*
fusion_group_node
=
graph
->
CreateOpNode
(
&
op_desc
);
for
(
auto
*
in
:
input_vars_of_subgraph
)
{
IR_NODE_LINK_TO
(
in
,
fusion_group_node
);
for
(
auto
*
in
:
input_vars
)
{
if
(
output_vars_set
.
find
(
in
)
==
output_vars_set
.
end
())
{
IR_NODE_LINK_TO
(
in
,
fusion_group_node
);
}
}
for
(
auto
*
out
:
output_var_without_intermediate
)
{
for
(
auto
*
out
:
output_vars
)
{
IR_NODE_LINK_TO
(
fusion_group_node
,
out
);
}
...
...
paddle/fluid/framework/ir/fusion_group/operation.cc
浏览文件 @
1be6bf45
...
...
@@ -105,12 +105,6 @@ void OperationMap::InsertUnaryElementwiseOperations() {
insert_handler
(
"tanh"
,
"%{2.0} / (%{1.0} + Exp(-%{2.0} * ${0})) - %{1.0}"
,
{
"${2} * (%{1.0} - ${1} * ${1})"
});
// cast:
// out = static_cast<T>(x)
// TODO(wangchaochaohu): This is not the compelete definition of
// cast Op, We need refine it later.
insert_handler
(
"cast"
,
"${0}"
,
{});
// sqrt:
// out = x^(1/2)
// dx = dout * 0.5 / out
...
...
@@ -121,11 +115,21 @@ void OperationMap::InsertUnaryElementwiseOperations() {
// dx = dout * 2.0 * x
insert_handler
(
"square"
,
"${0} * ${0}"
,
{
"${2} * %{2.0} * ${0}"
});
// assign:
// out = x
insert_handler
(
"assign"
,
"${0}"
,
{});
// cast:
// out = static_cast<T>(x)
// TODO(wangchaochaohu): This is not the compelete definition of
// cast Op, We need refine it later.
insert_handler
(
"cast"
,
"${0}"
,
{});
// scale
// out = (bias_after_scale) ? scale * X + bias : scale(X + bias)
// here we use '=' operator to seperate th default value
//
out = (bias_after_scale) ? scale * X + bias : scale(X + bias)
//
here we use '=' operator to seperate th default value
// TODO(wangchaochaohu): Later we need to support Tensor input for scale and
// bias.
//
bias.
insert_handler
(
"scale"
,
"${bias_after_scale=true} ? (${scale=%{1.0}} * ${0} + "
...
...
paddle/fluid/framework/ir/fusion_group/subgraph.h
浏览文件 @
1be6bf45
...
...
@@ -66,11 +66,12 @@ class SubGraph {
}
int
GetType
()
const
{
return
type_
;
}
bool
RemoveIntermediateOut
()
{
return
!
save_intermediate_out_
;
}
void
SetFuncName
(
std
::
string
func_name
)
{
func_name_
=
func_name
;
}
std
::
string
GetFuncName
()
const
{
return
func_name_
;
}
bool
SaveIntermediateOut
()
const
{
return
save_intermediate_out_
;
}
const
std
::
unordered_set
<
Node
*>&
Nodes
()
const
{
return
nodes_set_
;
}
const
std
::
vector
<
Node
*>&
SortedNodes
()
{
if
(
!
is_sorted_
)
{
...
...
@@ -118,66 +119,88 @@ class SubGraph {
return
input_vars
;
}
std
::
vector
<
Node
*>
GetOutputVarNodes
()
{
std
::
vector
<
Node
*>
GetOutputVarNodes
(
bool
with_intermediate_out
)
{
// The order of output nodes should be consistant anywhere..
std
::
vector
<
Node
*>
output_vars
_all
;
std
::
vector
<
Node
*>
output_vars
;
for
(
auto
*
n
:
SortedNodes
())
{
if
(
n
&&
n
->
IsVar
()
&&
n
->
Var
(
))
{
if
(
IsOutputOfInternalOp
(
n
))
{
// If the var_node is the output of some op_node in the subgraph, it
// is considered the output var node of the subgraph.
bool
is_found
=
false
;
for
(
auto
*
in
:
n
->
inputs
)
{
if
(
Has
(
in
))
{
is_found
=
true
;
if
(
with_intermediate_out
)
{
output_vars
.
push_back
(
n
);
}
else
{
if
(
n
->
outputs
.
empty
()
||
IsInputOfExternalOp
(
n
))
{
output_vars
.
push_back
(
n
);
}
}
if
(
is_found
)
{
output_vars_all
.
push_back
(
n
);
}
}
}
return
output_vars
_all
;
return
output_vars
;
}
std
::
vector
<
Node
*>
GetIntermediateOutVarNodes
()
{
return
intermediate_out_nodes_
;
// Intermediate output var nodes: the output of some op_node in the
// subgraph, but not referenced outside the subgraph.
std
::
vector
<
Node
*>
intermediate_out_vars
;
for
(
auto
*
n
:
SortedNodes
())
{
if
(
IsOutputOfInternalOp
(
n
)
&&
IsInputOfInternalOp
(
n
)
&&
!
IsInputOfExternalOp
(
n
))
{
// When the outputs size is 0, it is also considered a intermidiate
// output. It maybe an unused output or the fetching vars, so that we
// cannot eleiminate it directly here.
intermediate_out_vars
.
push_back
(
n
);
}
}
return
intermediate_out_vars
;
}
void
DetectIntermediateOutWithGraph
(
Graph
*
graph
)
{
auto
graph_nodes
=
graph
->
Nodes
();
for
(
auto
*
n
:
SortedNodes
())
{
bool
enable_remove
=
true
;
std
::
unordered_set
<
Node
*>
GetIntermediateOutVarNodesSet
(
)
{
std
::
vector
<
Node
*>
intermediate_out_vars
=
GetIntermediateOutVar
Nodes
();
return
std
::
unordered_set
<
Node
*>
(
intermediate_out_vars
.
begin
(),
intermediate_out_vars
.
end
());
}
if
(
n
&&
n
->
IsVar
()
&&
n
->
Var
())
{
bool
leaf_graph
=
true
;
for
(
auto
*
node
:
graph_nodes
)
{
if
(
node
->
IsOp
())
{
auto
inputs
=
node
->
inputs
;
for
(
auto
*
in
:
inputs
)
{
if
(
in
&&
in
->
Name
()
==
n
->
Name
())
{
if
(
!
Has
(
node
))
enable_remove
=
false
;
leaf_graph
=
false
;
}
}
}
if
(
!
enable_remove
)
{
break
;
}
private:
bool
IsInputOfInternalOp
(
Node
*
n
)
{
bool
is_input_of_internal_op
=
false
;
if
(
Has
(
n
)
&&
n
&&
n
->
IsVar
()
&&
n
->
Var
())
{
for
(
auto
*
out
:
n
->
outputs
)
{
if
(
Has
(
out
))
{
is_input_of_internal_op
=
true
;
break
;
}
if
(
leaf_graph
)
enable_remove
=
false
;
}
}
return
is_input_of_internal_op
;
}
}
else
{
enable_remove
=
false
;
bool
IsInputOfExternalOp
(
Node
*
n
)
{
// If n is the input any one node outside the subgraph.
bool
is_input_of_external_op
=
false
;
if
(
Has
(
n
)
&&
n
&&
n
->
IsVar
()
&&
n
->
Var
())
{
for
(
auto
*
out
:
n
->
outputs
)
{
if
(
!
Has
(
out
))
{
is_input_of_external_op
=
true
;
break
;
}
}
}
return
is_input_of_external_op
;
}
if
(
enable_remove
)
{
intermediate_out_nodes_
.
push_back
(
n
);
bool
IsOutputOfInternalOp
(
Node
*
n
)
{
bool
is_output_of_internal_op
=
false
;
if
(
Has
(
n
)
&&
n
&&
n
->
IsVar
()
&&
n
->
Var
())
{
for
(
auto
*
in
:
n
->
inputs
)
{
if
(
Has
(
in
))
{
is_output_of_internal_op
=
true
;
break
;
}
}
}
return
is_output_of_internal_op
;
}
private:
void
TopologicalSort
()
{
if
(
!
is_sorted_
)
{
std
::
unordered_map
<
Node
*
,
std
::
vector
<
Node
*>>
inputs_map
;
...
...
@@ -236,7 +259,6 @@ class SubGraph {
bool
save_intermediate_out_
{
true
};
std
::
unordered_set
<
Node
*>
nodes_set_
;
std
::
vector
<
Node
*>
intermediate_out_nodes_
{};
bool
is_sorted_
{
false
};
std
::
vector
<
Node
*>
sorted_nodes_
;
};
...
...
paddle/fluid/operators/fused/fusion_group_op.cc
浏览文件 @
1be6bf45
...
...
@@ -22,8 +22,14 @@ class FusionGroupOp : public framework::OperatorWithKernel {
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
const
size_t
num_ins
=
ctx
->
Inputs
(
"Inputs"
).
size
();
const
size_t
num_outs
=
ctx
->
Outputs
(
"Outs"
).
size
();
OP_INOUT_CHECK
(
ctx
->
HasInputs
(
"Inputs"
),
"Input"
,
"Inputs"
,
"FusionGroup"
);
OP_INOUT_CHECK
(
ctx
->
HasOutputs
(
"Outs"
),
"Output"
,
"Outs"
,
"FusionGroup"
);
auto
input_names
=
ctx
->
Inputs
(
"Inputs"
);
auto
output_names
=
ctx
->
Outputs
(
"Outs"
);
const
size_t
num_ins
=
input_names
.
size
();
const
size_t
num_outs
=
output_names
.
size
();
PADDLE_ENFORCE_GE
(
num_ins
,
1UL
,
...
...
@@ -42,9 +48,12 @@ class FusionGroupOp : public framework::OperatorWithKernel {
std
::
vector
<
framework
::
DDim
>
x_dims
=
ctx
->
GetInputsDim
(
"Inputs"
);
if
(
type
==
0
)
{
for
(
size_t
i
=
1
;
i
<
num_ins
;
++
i
)
{
PADDLE_ENFORCE_EQ
(
x_dims
[
0
],
x_dims
[
i
],
platform
::
errors
::
InvalidArgument
(
"All the inputs' dims should be the same."
));
PADDLE_ENFORCE_EQ
(
x_dims
[
0
],
x_dims
[
i
],
platform
::
errors
::
InvalidArgument
(
"All the inputs' dims is expected to be the same. "
"But recieved [%s] (name: %s) vs [%s] (name: %s)."
,
x_dims
[
0
],
input_names
[
0
],
x_dims
[
i
],
input_names
[
i
]));
}
std
::
vector
<
framework
::
DDim
>
out_dims
;
for
(
size_t
j
=
0
;
j
<
num_outs
;
++
j
)
{
...
...
@@ -76,11 +85,11 @@ class FusionGroupOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput
(
"Outs"
,
"(std::vector<LoDTensor>) The outputs of fusion_group op."
)
.
AsDuplicable
();
AddAttr
<
std
::
vector
<
std
::
string
>>
(
"outs_data_type"
,
"The data type of Outputs in fusion_group op."
)
AddAttr
<
std
::
vector
<
int
>>
(
"outs_dtype"
,
"The data type of Outputs in fusion_group op."
)
.
SetDefault
({});
AddAttr
<
std
::
vector
<
std
::
string
>>
(
"inputs_data_type"
,
"The data type of Inputs in fusion_group op."
)
AddAttr
<
std
::
vector
<
int
>>
(
"inputs_dtype"
,
"The data type of Inputs in fusion_group op."
)
.
SetDefault
({});
AddAttr
<
int
>
(
"type"
,
"Fusion type."
).
SetDefault
(
0
);
AddAttr
<
std
::
string
>
(
"func_name"
,
"Name of the generated functions."
)
...
...
paddle/fluid/operators/fused/fusion_group_op.h
浏览文件 @
1be6bf45
...
...
@@ -24,14 +24,14 @@ namespace operators {
static
void
MutableMultiTypeData
(
std
::
vector
<
paddle
::
framework
::
LoDTensor
*>*
var
,
const
std
::
vector
<
std
::
string
>&
data_type
,
const
platform
::
Place
&
place
)
{
const
std
::
vector
<
int
>&
data_type
,
const
platform
::
Place
&
place
)
{
for
(
size_t
i
=
0
;
i
<
var
->
size
();
i
++
)
{
if
(
data_type
[
i
]
==
"float"
)
{
if
(
data_type
[
i
]
==
framework
::
proto
::
VarType
::
FP32
)
{
(
*
var
)[
i
]
->
mutable_data
<
float
>
(
place
);
}
else
if
(
data_type
[
i
]
==
"double"
)
{
(
*
var
)[
i
]
->
mutable_data
<
double
>
(
place
);
}
else
if
(
data_type
[
i
]
==
"::paddle::platform::float16"
)
{
}
else
if
(
data_type
[
i
]
==
framework
::
proto
::
VarType
::
FP16
)
{
(
*
var
)[
i
]
->
mutable_data
<
paddle
::
platform
::
float16
>
(
place
);
}
else
if
(
data_type
[
i
]
==
framework
::
proto
::
VarType
::
FP64
)
{
(
*
var
)[
i
]
->
mutable_data
<
double
>
(
place
);
}
}
}
...
...
@@ -43,15 +43,15 @@ class FusionGroupKernel : public framework::OpKernel<T> {
auto
ins
=
ctx
.
MultiInput
<
framework
::
LoDTensor
>
(
"Inputs"
);
auto
outs
=
ctx
.
MultiOutput
<
framework
::
LoDTensor
>
(
"Outs"
);
int
type
=
ctx
.
Attr
<
int
>
(
"type"
);
auto
outs_type
=
ctx
.
Attr
<
std
::
vector
<
std
::
string
>>
(
"outs_data_
type"
);
auto
inputs_type
=
ctx
.
Attr
<
std
::
vector
<
std
::
string
>>
(
"inputs_data_
type"
);
const
auto
&
outs_dtype
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"outs_d
type"
);
const
auto
&
inputs_dtype
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"inputs_d
type"
);
size_t
num_ins
=
ins
.
size
();
size_t
num_outs
=
outs
.
size
();
auto
place
=
ctx
.
GetPlace
();
MutableMultiTypeData
(
&
outs
,
outs_type
,
place
);
MutableMultiTypeData
(
&
outs
,
outs_
d
type
,
place
);
std
::
string
func_name
=
ctx
.
Attr
<
std
::
string
>
(
"func_name"
);
platform
::
DeviceCode
*
dev_code
=
...
...
@@ -64,22 +64,22 @@ class FusionGroupKernel : public framework::OpKernel<T> {
args
.
push_back
(
&
n
);
std
::
vector
<
const
void
*>
ptrs
(
num_ins
+
num_outs
);
for
(
size_t
i
=
0
;
i
<
num_ins
;
++
i
)
{
if
(
inputs_
type
[
i
]
==
"::paddle::platform::float16"
)
{
if
(
inputs_
dtype
[
i
]
==
framework
::
proto
::
VarType
::
FP16
)
{
ptrs
[
i
]
=
ins
[
i
]
->
data
<
paddle
::
platform
::
float16
>
();
}
else
if
(
inputs_type
[
i
]
==
"double"
)
{
ptrs
[
i
]
=
ins
[
i
]
->
data
<
double
>
();
}
else
if
(
inputs_type
[
i
]
==
"float"
)
{
}
else
if
(
inputs_dtype
[
i
]
==
framework
::
proto
::
VarType
::
FP32
)
{
ptrs
[
i
]
=
ins
[
i
]
->
data
<
float
>
();
}
else
if
(
inputs_dtype
[
i
]
==
framework
::
proto
::
VarType
::
FP64
)
{
ptrs
[
i
]
=
ins
[
i
]
->
data
<
double
>
();
}
args
.
push_back
(
&
ptrs
[
i
]);
}
for
(
size_t
j
=
0
;
j
<
num_outs
;
++
j
)
{
if
(
outs_
type
[
j
]
==
"::paddle::platform::float16"
)
{
if
(
outs_
dtype
[
j
]
==
framework
::
proto
::
VarType
::
FP16
)
{
ptrs
[
num_ins
+
j
]
=
outs
[
j
]
->
data
<
paddle
::
platform
::
float16
>
();
}
else
if
(
outs_type
[
j
]
==
"double"
)
{
ptrs
[
num_ins
+
j
]
=
outs
[
j
]
->
data
<
double
>
();
}
else
if
(
outs_type
[
j
]
==
"float"
)
{
}
else
if
(
outs_dtype
[
j
]
==
framework
::
proto
::
VarType
::
FP32
)
{
ptrs
[
num_ins
+
j
]
=
outs
[
j
]
->
data
<
float
>
();
}
else
if
(
outs_dtype
[
j
]
==
framework
::
proto
::
VarType
::
FP64
)
{
ptrs
[
num_ins
+
j
]
=
outs
[
j
]
->
data
<
double
>
();
}
args
.
push_back
(
&
ptrs
[
num_ins
+
j
]);
}
...
...
paddle/fluid/operators/fused/fusion_group_op_test.cc
浏览文件 @
1be6bf45
...
...
@@ -57,10 +57,14 @@ framework::OpDesc* CreateFusionGroupOp(
const
std
::
vector
<
std
::
string
>&
input_names
,
const
std
::
vector
<
std
::
vector
<
int64_t
>>&
input_shapes
,
const
std
::
vector
<
std
::
string
>&
output_names
,
int
type
,
const
std
::
vector
<
std
::
string
>&
inputs_data_type
,
const
std
::
vector
<
std
::
string
>&
outs_data_type
,
std
::
string
func_name
)
{
std
::
string
func_name
)
{
EXPECT_EQ
(
input_names
.
size
(),
input_shapes
.
size
());
std
::
vector
<
int
>
input_dtypes
(
input_names
.
size
(),
framework
::
proto
::
VarType
::
FP32
);
std
::
vector
<
int
>
output_dtypes
(
output_names
.
size
(),
framework
::
proto
::
VarType
::
FP32
);
for
(
size_t
i
=
0
;
i
<
input_names
.
size
();
++
i
)
{
auto
*
var
=
program
->
MutableBlock
(
0
)
->
Var
(
input_names
[
i
]);
var
->
SetType
(
framework
::
proto
::
VarType
::
LOD_TENSOR
);
...
...
@@ -77,8 +81,8 @@ framework::OpDesc* CreateFusionGroupOp(
op
->
SetType
(
"fusion_group"
);
op
->
SetInput
(
"Inputs"
,
input_names
);
op
->
SetOutput
(
"Outs"
,
output_names
);
op
->
SetAttr
(
"inputs_d
ata_type"
,
inputs_data_type
);
op
->
SetAttr
(
"outs_d
ata_type"
,
outs_data_type
);
op
->
SetAttr
(
"inputs_d
type"
,
input_dtypes
);
op
->
SetAttr
(
"outs_d
type"
,
output_dtypes
);
op
->
SetAttr
(
"type"
,
type
);
op
->
SetAttr
(
"func_name"
,
func_name
);
op
->
SetAttr
(
framework
::
OpProtoAndCheckerMaker
::
OpRoleAttrName
(),
...
...
@@ -133,8 +137,6 @@ void CheckOutputs(framework::Scope* scope,
void
TestMain
(
const
std
::
vector
<
std
::
string
>&
input_names
,
const
std
::
vector
<
std
::
vector
<
int64_t
>>&
input_shapes
,
const
std
::
vector
<
std
::
string
>&
output_names
,
int
type
,
const
std
::
vector
<
std
::
string
>&
inputs_data_type
,
const
std
::
vector
<
std
::
string
>&
outs_data_type
,
std
::
string
func_name
,
std
::
string
cuda_kernel_str
,
CPUKernelFunc
cpu_kernel_func
)
{
// Compile the device code
...
...
@@ -144,9 +146,8 @@ void TestMain(const std::vector<std::string>& input_names,
// Create a ProgramDesc that has a fusion_group_op.
framework
::
ProgramDesc
program
;
framework
::
OpDesc
*
op_desc
=
CreateFusionGroupOp
(
&
program
,
input_names
,
input_shapes
,
output_names
,
type
,
inputs_data_type
,
outs_data_type
,
func_name
);
framework
::
OpDesc
*
op_desc
=
CreateFusionGroupOp
(
&
program
,
input_names
,
input_shapes
,
output_names
,
type
,
func_name
);
auto
fusion_group_op
=
framework
::
OpRegistry
::
CreateOp
(
*
op_desc
);
framework
::
Scope
scope
;
...
...
@@ -216,11 +217,8 @@ void elementwise_cuda_kernel_0(size_t n, float *x, float* y, float* z) {
}
};
std
::
vector
<
std
::
string
>
inputs_data_type
(
input_names
.
size
(),
"float"
);
std
::
vector
<
std
::
string
>
outs_data_type
(
output_names
.
size
(),
"float"
);
TestMain
(
input_names
,
input_shapes
,
output_names
,
0
,
inputs_data_type
,
outs_data_type
,
"elementwise_cuda_kernel_0"
,
kernel
,
elementwise_cpu_kernel_0
);
TestMain
(
input_names
,
input_shapes
,
output_names
,
0
,
"elementwise_cuda_kernel_0"
,
kernel
,
elementwise_cpu_kernel_0
);
}
}
// namespace operators
...
...
python/paddle/fluid/tests/unittests/ir/test_ir_fusion_group_pass.py
浏览文件 @
1be6bf45
...
...
@@ -77,12 +77,13 @@ class FusionGroupPassTest(PassTest):
self
.
check_output_with_place
(
fluid
.
CUDAPlace
(
0
))
class
FusionGroupPass
Test1
(
FusionGroupPassTest
):
class
FusionGroupPass
ComplicatedTest
(
FusionGroupPassTest
):
def
build_program
(
self
,
dtype
):
with
fluid
.
program_guard
(
self
.
main_program
,
self
.
startup_program
):
self
.
feed_vars
=
self
.
_prepare_feed_vars
([
32
,
128
],
dtype
,
5
)
self
.
feed_vars
=
self
.
_prepare_feed_vars
([
32
,
64
],
dtype
,
5
)
tmp_0
=
layers
.
assign
(
self
.
feed_vars
[
0
])
one
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
dtype
,
value
=
1.0
)
tmp_0
=
one
*
self
.
feed_vars
[
0
]
# subgraph with 9 op nodes
tmp_1
=
tmp_0
*
layers
.
sigmoid
(
self
.
feed_vars
[
1
])
+
layers
.
sigmoid
(
self
.
feed_vars
[
2
])
*
layers
.
tanh
(
self
.
feed_vars
[
3
])
...
...
@@ -94,7 +95,7 @@ class FusionGroupPassTest1(FusionGroupPassTest):
self
.
fetch_list
=
[
tmp_2
,
self
.
grad
(
tmp_0
)]
class
FusionGroupPass
Test2
(
FusionGroupPassTest
):
class
FusionGroupPass
InplaceTest
(
FusionGroupPassTest
):
def
build_program
(
self
,
dtype
):
with
fluid
.
program_guard
(
self
.
main_program
,
self
.
startup_program
):
self
.
feed_vars
=
self
.
_prepare_feed_vars
([
32
,
128
],
dtype
,
3
)
...
...
@@ -103,15 +104,13 @@ class FusionGroupPassTest2(FusionGroupPassTest):
name
=
"data3"
,
shape
=
[
128
,
32
],
dtype
=
dtype
))
# subgraph with 3 op node
tmp_0
=
self
.
feed_vars
[
0
]
+
self
.
feed_vars
[
1
]
tmp_1
=
layers
.
relu
(
self
.
feed_vars
[
2
]
*
tmp_0
)
# subgraph with 2 op nodes
tmp_2
=
layers
.
relu
(
layers
.
sigmoid
(
self
.
feed_vars
[
3
]))
tmp_3
=
layers
.
mul
(
tmp_1
,
tmp_2
)
tmp_0
=
self
.
feed_vars
[
0
]
-
self
.
feed_vars
[
1
]
tmp_1
=
tmp_0
*
self
.
feed_vars
[
2
]
tmp_2
=
layers
.
assign
(
tmp_1
,
output
=
tmp_0
)
tmp_3
=
layers
.
mul
(
tmp_2
,
self
.
feed_vars
[
3
])
self
.
append_gradients
(
tmp_3
)
self
.
num_fused_ops
=
2
self
.
fetch_list
=
[
tmp_3
,
self
.
grad
(
tmp_1
)]
self
.
num_fused_ops
=
1
self
.
fetch_list
=
[
tmp_3
]
class
FusionGroupPassTestFP64
(
FusionGroupPassTest
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录