Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
552ed8d8
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
552ed8d8
编写于
7月 19, 2023
作者:
C
csy0225
提交者:
GitHub
7月 19, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Delete repeat ops add gather squeeze unsqueeze (#55371)
上级
bc153701
变更
6
显示空白变更内容
内联
并排
Showing
6 changed file
with
788 addition
and
19 deletion
+788
-19
paddle/fluid/framework/ir/CMakeLists.txt
paddle/fluid/framework/ir/CMakeLists.txt
+1
-0
paddle/fluid/framework/ir/delete_repeated_ops_pass.cc
paddle/fluid/framework/ir/delete_repeated_ops_pass.cc
+59
-18
paddle/fluid/framework/ir/fused_continuous_same_ops_pass.cc
paddle/fluid/framework/ir/fused_continuous_same_ops_pass.cc
+237
-0
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+2
-1
test/ir/inference/test_xpu_delete_repeated_ops_pass.py
test/ir/inference/test_xpu_delete_repeated_ops_pass.py
+347
-0
test/ir/inference/test_xpu_fused_continuous_same_ops_pass.py
test/ir/inference/test_xpu_fused_continuous_same_ops_pass.py
+142
-0
未找到文件。
paddle/fluid/framework/ir/CMakeLists.txt
浏览文件 @
552ed8d8
...
@@ -129,6 +129,7 @@ pass_library(dense_multihead_matmul_to_sparse_pass inference)
...
@@ -129,6 +129,7 @@ pass_library(dense_multihead_matmul_to_sparse_pass inference)
pass_library
(
delete_cast_op_pass inference
)
pass_library
(
delete_cast_op_pass inference
)
pass_library
(
delete_elementwise_mul_op_pass inference
)
pass_library
(
delete_elementwise_mul_op_pass inference
)
pass_library
(
delete_repeated_ops_pass inference
)
pass_library
(
delete_repeated_ops_pass inference
)
pass_library
(
fused_continuous_same_ops_pass inference
)
pass_library
(
sigmoid_elementmul_fuse_pass inference
)
pass_library
(
sigmoid_elementmul_fuse_pass inference
)
pass_library
(
generate_pass DEPS pass_desc_proto
)
pass_library
(
generate_pass DEPS pass_desc_proto
)
target_link_libraries
(
generate_pass pass_desc_proto
)
target_link_libraries
(
generate_pass pass_desc_proto
)
...
...
paddle/fluid/framework/ir/delete_repeated_ops_pass.cc
浏览文件 @
552ed8d8
...
@@ -101,18 +101,18 @@ class DeleteRepeatedOpsPass : public FusePassBase {
...
@@ -101,18 +101,18 @@ class DeleteRepeatedOpsPass : public FusePassBase {
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
;
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
;
private:
private:
void
DeleteRepeatedOps
(
void
DeleteRepeatedOps
(
ir
::
Graph
*
graph
,
ir
::
Graph
*
graph
,
const
std
::
string
&
op_type
,
const
std
::
string
&
op_type
,
std
::
function
<
std
::
string
(
OpDesc
*
)
>
gen_op_key_fn
)
const
;
std
::
function
<
std
::
string
(
Node
*
)
>
gen_op_key_fn
)
const
;
const
std
::
string
name_scope_
{
"delete_repeated_ops_pass"
};
const
std
::
string
name_scope_
{
"delete_repeated_ops_pass"
};
mutable
int
delete_op_count
{
0
};
};
};
void
DeleteRepeatedOpsPass
::
DeleteRepeatedOps
(
void
DeleteRepeatedOpsPass
::
DeleteRepeatedOps
(
ir
::
Graph
*
graph
,
ir
::
Graph
*
graph
,
const
std
::
string
&
op_type
,
const
std
::
string
&
op_type
,
std
::
function
<
std
::
string
(
OpDesc
*
)
>
gen_op_key_fn
)
const
{
std
::
function
<
std
::
string
(
Node
*
)
>
gen_op_key_fn
)
const
{
GraphPatternDetector
gpd
;
GraphPatternDetector
gpd
;
patterns
::
VarWithRepeatedOpsPattern
pattern
(
patterns
::
VarWithRepeatedOpsPattern
pattern
(
gpd
.
mutable_pattern
(),
name_scope_
,
op_type
);
gpd
.
mutable_pattern
(),
name_scope_
,
op_type
);
...
@@ -140,7 +140,7 @@ void DeleteRepeatedOpsPass::DeleteRepeatedOps(
...
@@ -140,7 +140,7 @@ void DeleteRepeatedOpsPass::DeleteRepeatedOps(
}
}
}
}
if
(
out_op_is_invalid
)
continue
;
if
(
out_op_is_invalid
)
continue
;
auto
attr_key
=
gen_op_key_fn
(
op
->
Op
()
);
auto
attr_key
=
gen_op_key_fn
(
op
);
ops_map
[
attr_key
].
push_back
(
op
);
ops_map
[
attr_key
].
push_back
(
op
);
}
}
for
(
auto
iter
=
ops_map
.
begin
();
iter
!=
ops_map
.
end
();)
{
for
(
auto
iter
=
ops_map
.
begin
();
iter
!=
ops_map
.
end
();)
{
...
@@ -173,16 +173,18 @@ void DeleteRepeatedOpsPass::DeleteRepeatedOps(
...
@@ -173,16 +173,18 @@ void DeleteRepeatedOpsPass::DeleteRepeatedOps(
};
};
gpd
(
graph
,
handler
);
gpd
(
graph
,
handler
);
delete_op_count
+=
delete_counts
;
if
(
delete_counts
>
0
)
{
if
(
delete_counts
>
0
)
{
LOG
(
INFO
)
<<
"--- delete "
<<
delete_counts
<<
" repeated "
<<
op_type
LOG
(
INFO
)
<<
"--- delete "
<<
delete_counts
<<
" repeated "
<<
op_type
<<
" ops"
;
<<
" ops"
;
}
}
}
}
std
::
string
GenShapeAttrKey
(
OpDesc
*
slice_op_desc
)
{
return
""
;
}
std
::
string
GenShapeAttrKey
(
Node
*
shape_op_node
)
{
return
""
;
}
std
::
string
GenSliceAttrKey
(
OpDesc
*
slice_op_desc
)
{
std
::
string
GenSliceAttrKey
(
Node
*
slice_op_node
)
{
std
::
string
attr_key
;
std
::
string
attr_key
;
auto
slice_op_desc
=
slice_op_node
->
Op
();
auto
starts
=
slice_op_desc
->
GetAttrIfExists
<
std
::
vector
<
int
>>
(
"starts"
);
auto
starts
=
slice_op_desc
->
GetAttrIfExists
<
std
::
vector
<
int
>>
(
"starts"
);
auto
ends
=
slice_op_desc
->
GetAttrIfExists
<
std
::
vector
<
int
>>
(
"ends"
);
auto
ends
=
slice_op_desc
->
GetAttrIfExists
<
std
::
vector
<
int
>>
(
"ends"
);
auto
axes
=
slice_op_desc
->
GetAttrIfExists
<
std
::
vector
<
int
>>
(
"axes"
);
auto
axes
=
slice_op_desc
->
GetAttrIfExists
<
std
::
vector
<
int
>>
(
"axes"
);
...
@@ -207,21 +209,24 @@ std::string GenSliceAttrKey(OpDesc* slice_op_desc) {
...
@@ -207,21 +209,24 @@ std::string GenSliceAttrKey(OpDesc* slice_op_desc) {
return
attr_key
;
return
attr_key
;
}
}
std
::
string
GenCastAttrKey
(
OpDesc
*
cast_op_desc
)
{
std
::
string
GenCastAttrKey
(
Node
*
cast_op_node
)
{
auto
cast_op_desc
=
cast_op_node
->
Op
();
auto
in_dtype
=
cast_op_desc
->
GetAttrIfExists
<
int
>
(
"in_dtype"
);
auto
in_dtype
=
cast_op_desc
->
GetAttrIfExists
<
int
>
(
"in_dtype"
);
auto
out_dtype
=
cast_op_desc
->
GetAttrIfExists
<
int
>
(
"out_dtype"
);
auto
out_dtype
=
cast_op_desc
->
GetAttrIfExists
<
int
>
(
"out_dtype"
);
return
"in_dtype_"
+
std
::
to_string
(
in_dtype
)
+
"_out_dtype_"
+
return
"in_dtype_"
+
std
::
to_string
(
in_dtype
)
+
"_out_dtype_"
+
std
::
to_string
(
out_dtype
);
std
::
to_string
(
out_dtype
);
}
}
std
::
string
GenAddAttrKey
(
OpDesc
*
add_op_desc
)
{
std
::
string
GenAddAttrKey
(
Node
*
add_op_node
)
{
auto
add_op_desc
=
add_op_node
->
Op
();
std
::
string
x_name
=
add_op_desc
->
Input
(
"X"
)[
0
];
std
::
string
x_name
=
add_op_desc
->
Input
(
"X"
)[
0
];
std
::
string
y_name
=
add_op_desc
->
Input
(
"Y"
)[
0
];
std
::
string
y_name
=
add_op_desc
->
Input
(
"Y"
)[
0
];
auto
axis
=
add_op_desc
->
GetAttrIfExists
<
int
>
(
"axis"
);
auto
axis
=
add_op_desc
->
GetAttrIfExists
<
int
>
(
"axis"
);
return
x_name
+
"_"
+
y_name
+
"_axis_"
+
std
::
to_string
(
axis
);
return
x_name
+
"_"
+
y_name
+
"_axis_"
+
std
::
to_string
(
axis
);
}
}
std
::
string
GenScaleAttrKey
(
OpDesc
*
scale_op_desc
)
{
std
::
string
GenScaleAttrKey
(
Node
*
scale_op_node
)
{
auto
scale_op_desc
=
scale_op_node
->
Op
();
auto
scale
=
scale_op_desc
->
GetAttrIfExists
<
float
>
(
"scale"
);
auto
scale
=
scale_op_desc
->
GetAttrIfExists
<
float
>
(
"scale"
);
auto
bias
=
scale_op_desc
->
GetAttrIfExists
<
float
>
(
"bias"
);
auto
bias
=
scale_op_desc
->
GetAttrIfExists
<
float
>
(
"bias"
);
auto
bias_after_scale
=
auto
bias_after_scale
=
...
@@ -230,17 +235,53 @@ std::string GenScaleAttrKey(OpDesc* scale_op_desc) {
...
@@ -230,17 +235,53 @@ std::string GenScaleAttrKey(OpDesc* scale_op_desc) {
"_bias_after_scale_"
+
std
::
to_string
(
bias_after_scale
);
"_bias_after_scale_"
+
std
::
to_string
(
bias_after_scale
);
}
}
std
::
string
GenGatherAttrKey
(
Node
*
gather_op_node
)
{
std
::
string
input_names
{
""
};
for
(
auto
input_var
:
gather_op_node
->
inputs
)
{
input_names
+=
input_var
->
Var
()
->
Name
();
}
auto
gather_op_desc
=
gather_op_node
->
Op
();
auto
axis
=
gather_op_desc
->
GetAttrIfExists
<
int
>
(
"axis"
);
return
"axis_"
+
std
::
to_string
(
axis
)
+
"_input_names_"
+
input_names
;
}
std
::
string
GenSqueeze2AttrKey
(
Node
*
squeeze2_op_node
)
{
auto
squeeze2_op_desc
=
squeeze2_op_node
->
Op
();
auto
axes
=
squeeze2_op_desc
->
GetAttrIfExists
<
std
::
vector
<
int
>>
(
"axes"
);
std
::
string
attr_key
{
""
};
attr_key
+=
"axes_"
;
for
(
auto
axis
:
axes
)
{
attr_key
+=
std
::
to_string
(
axis
)
+
"_"
;
}
return
attr_key
;
}
void
DeleteRepeatedOpsPass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
void
DeleteRepeatedOpsPass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
PADDLE_ENFORCE_NOT_NULL
(
PADDLE_ENFORCE_NOT_NULL
(
graph
,
platform
::
errors
::
PreconditionNotMet
(
"graph should not be null."
));
graph
,
platform
::
errors
::
PreconditionNotMet
(
"graph should not be null."
));
Init
(
name_scope_
,
graph
);
Init
(
name_scope_
,
graph
);
int
repeat_time
=
0
;
int
total_delete_op_count
=
0
;
// This pass needs to loop run until there are no nodes in the graph that need
// to be deleted.
while
(
true
)
{
delete_op_count
=
0
;
DeleteRepeatedOps
(
graph
,
"shape"
,
GenShapeAttrKey
);
DeleteRepeatedOps
(
graph
,
"shape"
,
GenShapeAttrKey
);
DeleteRepeatedOps
(
graph
,
"slice"
,
GenSliceAttrKey
);
DeleteRepeatedOps
(
graph
,
"slice"
,
GenSliceAttrKey
);
DeleteRepeatedOps
(
graph
,
"cast"
,
GenCastAttrKey
);
DeleteRepeatedOps
(
graph
,
"cast"
,
GenCastAttrKey
);
DeleteRepeatedOps
(
graph
,
"elementwise_add"
,
GenAddAttrKey
);
DeleteRepeatedOps
(
graph
,
"elementwise_add"
,
GenAddAttrKey
);
DeleteRepeatedOps
(
graph
,
"scale"
,
GenScaleAttrKey
);
DeleteRepeatedOps
(
graph
,
"scale"
,
GenScaleAttrKey
);
DeleteRepeatedOps
(
graph
,
"cast"
,
GenCastAttrKey
);
DeleteRepeatedOps
(
graph
,
"gather"
,
GenGatherAttrKey
);
DeleteRepeatedOps
(
graph
,
"squeeze2"
,
GenSqueeze2AttrKey
);
DeleteRepeatedOps
(
graph
,
"unsqueeze2"
,
GenSqueeze2AttrKey
);
LOG
(
INFO
)
<<
"Round "
<<
repeat_time
++
<<
": delete op counts: "
<<
delete_op_count
;
total_delete_op_count
+=
delete_op_count
;
if
(
delete_op_count
==
0
)
{
break
;
// No node need to delete.
}
}
LOG
(
INFO
)
<<
"Total delete op counts: "
<<
total_delete_op_count
;
}
}
}
// namespace ir
}
// namespace ir
...
...
paddle/fluid/framework/ir/fused_continuous_same_ops_pass.cc
0 → 100644
浏览文件 @
552ed8d8
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/enforce.h"
namespace
phi
{
class
DenseTensor
;
}
// namespace phi
namespace
paddle
{
namespace
framework
{
class
Scope
;
}
// namespace framework
}
// namespace paddle
namespace
paddle
{
namespace
framework
{
namespace
ir
{
namespace
patterns
{
struct
ContinuousSameOpsPattern
:
public
PatternBase
{
ContinuousSameOpsPattern
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
,
const
std
::
string
&
op_type
);
PATTERN_DECL_NODE
(
first_in_var_node
);
PATTERN_DECL_NODE
(
first_out_var_node
);
PATTERN_DECL_NODE
(
second_out_var_node
);
// declare op node's name
PATTERN_DECL_NODE
(
first_op_node
);
PATTERN_DECL_NODE
(
second_op_node
);
std
::
string
op_type_
;
};
ContinuousSameOpsPattern
::
ContinuousSameOpsPattern
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
,
const
std
::
string
&
op_type
)
:
PatternBase
(
pattern
,
name_scope
,
name_scope
),
op_type_
(
op_type
)
{
auto
*
first_in_var_node
=
pattern
->
NewNode
(
first_in_var_node_repr
())
->
assert_var_not_persistable
()
->
assert_is_op_input
(
op_type_
,
"X"
)
->
AsInput
()
->
assert_more
([
&
](
Node
*
node
)
{
// assert pre op type is not same.
auto
input_nodes
=
node
->
inputs
;
if
(
input_nodes
.
size
()
!=
1
)
return
false
;
if
(
!
input_nodes
.
empty
()
&&
input_nodes
[
0
]
->
IsOp
()
&&
input_nodes
[
0
]
->
Op
()
->
Type
()
==
op_type_
)
{
return
false
;
}
return
true
;
});
auto
*
first_op_node
=
pattern
->
NewNode
(
first_op_node_repr
())
->
assert_is_op
(
op_type_
);
auto
*
first_out_var_node
=
pattern
->
NewNode
(
first_out_var_node_repr
())
->
assert_var_not_persistable
()
->
assert_is_op_output
(
op_type_
,
"Out"
)
->
assert_has_n_outputs
(
1
);
first_op_node
->
LinksFrom
({
first_in_var_node
}).
LinksTo
({
first_out_var_node
});
auto
*
second_op_node
=
pattern
->
NewNode
(
second_op_node_repr
())
->
assert_is_op
(
op_type_
);
auto
*
second_out_var_node
=
pattern
->
NewNode
(
second_out_var_node_repr
())
->
assert_var_not_persistable
()
->
assert_is_op_output
(
op_type_
,
"Out"
)
->
AsOutput
();
second_op_node
->
LinksFrom
({
first_out_var_node
})
.
LinksTo
({
second_out_var_node
});
}
}
// namespace patterns
/*
Fused continuous same ops into one.
Origin graph:
input
|
|
unsqueeze2
|
|
unsqueeze2
|
|
unsqueeze2
|
|
out
After:
input
|
|
unsqueeze2
|
|
out
*/
class
FusedContinuousSameOpsPass
:
public
FusePassBase
{
protected:
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
;
private:
void
FusedReshapeOps
(
ir
::
Graph
*
graph
)
const
;
void
FusedUnsqueezeOps
(
ir
::
Graph
*
graph
)
const
;
const
std
::
string
name_scope_
{
"fused_continuous_same_ops_pass"
};
mutable
int
delete_op_count
{
0
};
};
void
FusedContinuousSameOpsPass
::
FusedReshapeOps
(
ir
::
Graph
*
graph
)
const
{
GraphPatternDetector
gpd
;
patterns
::
ContinuousSameOpsPattern
pattern
(
gpd
.
mutable_pattern
(),
name_scope_
,
"reshape2"
);
int
delete_counts
=
0
;
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
graph
)
{
VLOG
(
4
)
<<
"handle fused continuous reshape ops."
;
GET_IR_NODE_FROM_SUBGRAPH
(
first_in_var_node
,
first_in_var_node
,
pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
first_out_var_node
,
first_out_var_node
,
pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
second_out_var_node
,
second_out_var_node
,
pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
first_op_node
,
first_op_node
,
pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
second_op_node
,
second_op_node
,
pattern
);
auto
first_node_attr_shape
=
first_op_node
->
Op
()
->
GetAttrIfExists
<
std
::
vector
<
int
>>
(
"shape"
);
if
(
first_node_attr_shape
.
empty
())
return
;
auto
second_node_attr_shape
=
second_op_node
->
Op
()
->
GetAttrIfExists
<
std
::
vector
<
int
>>
(
"shape"
);
if
(
second_node_attr_shape
.
empty
())
return
;
second_op_node
->
Op
()
->
RenameInput
(
first_out_var_node
->
Name
(),
first_in_var_node
->
Name
());
IR_NODE_LINK_TO
(
first_in_var_node
,
second_op_node
);
GraphSafeRemoveNodes
(
graph
,
{
first_op_node
,
first_out_var_node
});
delete_counts
++
;
};
gpd
(
graph
,
handler
);
delete_op_count
+=
delete_counts
;
if
(
delete_counts
>
0
)
{
LOG
(
INFO
)
<<
"--- delete "
<<
delete_counts
<<
" repeated "
<<
"reshape2"
<<
" ops"
;
}
}
void
FusedContinuousSameOpsPass
::
FusedUnsqueezeOps
(
ir
::
Graph
*
graph
)
const
{
GraphPatternDetector
gpd
;
patterns
::
ContinuousSameOpsPattern
pattern
(
gpd
.
mutable_pattern
(),
name_scope_
,
"unsqueeze2"
);
int
delete_counts
=
0
;
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
graph
)
{
VLOG
(
4
)
<<
"handle fused continuous unsqueeze ops."
;
GET_IR_NODE_FROM_SUBGRAPH
(
first_in_var_node
,
first_in_var_node
,
pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
first_out_var_node
,
first_out_var_node
,
pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
second_out_var_node
,
second_out_var_node
,
pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
first_op_node
,
first_op_node
,
pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
second_op_node
,
second_op_node
,
pattern
);
auto
first_node_attr_axes
=
first_op_node
->
Op
()
->
GetAttrIfExists
<
std
::
vector
<
int
>>
(
"axes"
);
if
(
first_node_attr_axes
.
empty
())
return
;
auto
second_node_attr_axes
=
second_op_node
->
Op
()
->
GetAttrIfExists
<
std
::
vector
<
int
>>
(
"axes"
);
if
(
second_node_attr_axes
.
empty
())
return
;
second_op_node
->
Op
()
->
RenameInput
(
first_out_var_node
->
Name
(),
first_in_var_node
->
Name
());
second_node_attr_axes
.
insert
(
second_node_attr_axes
.
begin
(),
first_node_attr_axes
.
begin
(),
first_node_attr_axes
.
end
());
second_op_node
->
Op
()
->
SetAttr
(
"axes"
,
second_node_attr_axes
);
IR_NODE_LINK_TO
(
first_in_var_node
,
second_op_node
);
GraphSafeRemoveNodes
(
graph
,
{
first_op_node
,
first_out_var_node
});
delete_counts
++
;
};
gpd
(
graph
,
handler
);
delete_op_count
+=
delete_counts
;
if
(
delete_counts
>
0
)
{
LOG
(
INFO
)
<<
"--- delete "
<<
delete_counts
<<
" repeated "
<<
"unsqueeze2"
<<
" ops"
;
}
}
void
FusedContinuousSameOpsPass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
PADDLE_ENFORCE_NOT_NULL
(
graph
,
platform
::
errors
::
PreconditionNotMet
(
"graph should not be null."
));
Init
(
name_scope_
,
graph
);
int
repeat_time
=
0
;
int
total_delete_op_count
=
0
;
// This pass needs to loop run until there are no nodes in the graph that need
// to be deleted.
while
(
true
)
{
delete_op_count
=
0
;
FusedReshapeOps
(
graph
);
FusedUnsqueezeOps
(
graph
);
LOG
(
INFO
)
<<
"Round "
<<
repeat_time
++
<<
": delete op counts: "
<<
delete_op_count
;
total_delete_op_count
+=
delete_op_count
;
if
(
delete_op_count
==
0
)
{
LOG
(
INFO
)
<<
"--- no nodes need to delete --- break"
;
break
;
// No node need to delete.
}
}
LOG
(
INFO
)
<<
"Total delete op counts: "
<<
total_delete_op_count
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
fused_continuous_same_ops_pass
,
paddle
::
framework
::
ir
::
FusedContinuousSameOpsPass
);
REGISTER_PASS_CAPABILITY
(
fused_continuous_same_ops_pass
)
.
AddCombination
(
paddle
::
framework
::
compatible
::
OpVersionComparatorCombination
().
EQ
(
"reshape2"
,
0
))
.
AddCombination
(
paddle
::
framework
::
compatible
::
OpVersionComparatorCombination
().
EQ
(
"unsqueeze2"
,
0
));
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
552ed8d8
...
@@ -507,8 +507,9 @@ XpuPassStrategy::XpuPassStrategy() : PassStrategy({}) {
...
@@ -507,8 +507,9 @@ XpuPassStrategy::XpuPassStrategy() : PassStrategy({}) {
"delete_assign_op_pass"
,
"delete_assign_op_pass"
,
"delete_dropout_op_pass"
,
"delete_dropout_op_pass"
,
"delete_concat_op_pass"
,
"delete_concat_op_pass"
,
"identity_op_clean_pass"
,
"delete_repeated_ops_pass"
,
"delete_repeated_ops_pass"
,
"identity_op_clean_pass"
,
"fused_continuous_same_ops_pass"
,
"reshape_unstack_concat_fuse_pass"
,
"reshape_unstack_concat_fuse_pass"
,
"delete_op_device_pass"
,
"delete_op_device_pass"
,
"constant_folding_pass"
,
"constant_folding_pass"
,
...
...
test/ir/inference/test_xpu_delete_repeated_ops_pass.py
浏览文件 @
552ed8d8
...
@@ -13,8 +13,10 @@
...
@@ -13,8 +13,10 @@
# limitations under the License.
# limitations under the License.
import
unittest
import
unittest
from
functools
import
partial
import
hypothesis.strategies
as
st
import
hypothesis.strategies
as
st
import
numpy
as
np
from
auto_scan_test
import
PassAutoScanTest
from
auto_scan_test
import
PassAutoScanTest
from
program_config
import
OpConfig
,
ProgramConfig
,
TensorConfig
from
program_config
import
OpConfig
,
ProgramConfig
,
TensorConfig
...
@@ -380,5 +382,350 @@ class TestDeleteRepeatedScalePass(PassAutoScanTest):
...
@@ -380,5 +382,350 @@ class TestDeleteRepeatedScalePass(PassAutoScanTest):
)
)
class
TestDeleteRepeatedSqueezePass
(
PassAutoScanTest
):
def
sample_predictor_configs
(
self
,
program_config
):
config
=
self
.
create_inference_config
(
use_xpu
=
True
)
yield
config
,
[
'scale'
,
'squeeze2'
,
'relu'
,
'relu'
,
'relu'
],
(
1e-5
,
1e-5
,
)
def
sample_program_config
(
self
,
draw
):
scale_x
=
draw
(
st
.
lists
(
st
.
integers
(
min_value
=
1
,
max_value
=
20
),
min_size
=
2
,
max_size
=
4
)
)
scale_x
[
0
]
=
1
axis
=
0
scale_op0
=
OpConfig
(
"scale"
,
inputs
=
{
"X"
:
[
"scale_x"
],
},
scale
=
2.0
,
bias
=
1.0
,
bias_after_scale
=
True
,
outputs
=
{
"Out"
:
[
"scale0_out"
]},
)
squeeze_op0
=
OpConfig
(
"squeeze2"
,
inputs
=
{
"X"
:
[
"scale0_out"
],
},
axes
=
[
axis
],
outputs
=
{
"Out"
:
[
"squeeze0_out"
]},
)
relu_op0
=
OpConfig
(
"relu"
,
inputs
=
{
"X"
:
[
"squeeze0_out"
],
},
outputs
=
{
"Out"
:
[
"relu0_out"
]},
)
scale_op1
=
OpConfig
(
"scale"
,
inputs
=
{
"X"
:
[
"scale_x"
],
},
scale
=
2.0
,
bias
=
1.0
,
bias_after_scale
=
True
,
outputs
=
{
"Out"
:
[
"scale1_out"
]},
)
squeeze_op1
=
OpConfig
(
"squeeze2"
,
inputs
=
{
"X"
:
[
"scale1_out"
],
},
axes
=
[
axis
],
outputs
=
{
"Out"
:
[
"squeeze1_out"
]},
)
relu_op1
=
OpConfig
(
"relu"
,
inputs
=
{
"X"
:
[
"squeeze1_out"
],
},
outputs
=
{
"Out"
:
[
"relu1_out"
]},
)
scale_op2
=
OpConfig
(
"scale"
,
inputs
=
{
"X"
:
[
"scale_x"
],
},
scale
=
2.0
,
bias
=
1.0
,
bias_after_scale
=
True
,
outputs
=
{
"Out"
:
[
"scale2_out"
]},
)
squeeze_op2
=
OpConfig
(
"squeeze2"
,
inputs
=
{
"X"
:
[
"scale2_out"
],
},
axes
=
[
axis
],
outputs
=
{
"Out"
:
[
"squeeze2_out"
]},
)
relu_op2
=
OpConfig
(
"relu"
,
inputs
=
{
"X"
:
[
"squeeze2_out"
],
},
outputs
=
{
"Out"
:
[
"relu2_out"
]},
)
ops
=
[
scale_op0
,
squeeze_op0
,
relu_op0
,
scale_op1
,
squeeze_op1
,
relu_op1
,
scale_op2
,
squeeze_op2
,
relu_op2
,
]
program_config
=
ProgramConfig
(
ops
=
ops
,
weights
=
{},
inputs
=
{
"scale_x"
:
TensorConfig
(
shape
=
scale_x
),
},
outputs
=
[
"relu0_out"
,
"relu1_out"
,
"relu2_out"
],
)
return
program_config
class
TestDeleteRepeatedUnSqueezePass
(
PassAutoScanTest
):
def
sample_predictor_configs
(
self
,
program_config
):
config
=
self
.
create_inference_config
(
use_xpu
=
True
)
yield
config
,
[
'scale'
,
'unsqueeze2'
,
'relu'
,
'relu'
,
'relu'
],
(
1e-5
,
1e-5
,
)
def
sample_program_config
(
self
,
draw
):
scale_x
=
draw
(
st
.
lists
(
st
.
integers
(
min_value
=
1
,
max_value
=
20
),
min_size
=
2
,
max_size
=
4
)
)
axis
=
0
scale_op0
=
OpConfig
(
"scale"
,
inputs
=
{
"X"
:
[
"scale_x"
],
},
scale
=
2.0
,
bias
=
1.0
,
bias_after_scale
=
True
,
outputs
=
{
"Out"
:
[
"scale0_out"
]},
)
unsqueeze_op0
=
OpConfig
(
"unsqueeze2"
,
inputs
=
{
"X"
:
[
"scale0_out"
],
},
axes
=
[
axis
],
outputs
=
{
"Out"
:
[
"unsqueeze0_out"
]},
)
relu_op0
=
OpConfig
(
"relu"
,
inputs
=
{
"X"
:
[
"unsqueeze0_out"
],
},
outputs
=
{
"Out"
:
[
"relu0_out"
]},
)
scale_op1
=
OpConfig
(
"scale"
,
inputs
=
{
"X"
:
[
"scale_x"
],
},
scale
=
2.0
,
bias
=
1.0
,
bias_after_scale
=
True
,
outputs
=
{
"Out"
:
[
"scale1_out"
]},
)
unsqueeze_op1
=
OpConfig
(
"unsqueeze2"
,
inputs
=
{
"X"
:
[
"scale1_out"
],
},
axes
=
[
axis
],
outputs
=
{
"Out"
:
[
"unsqueeze1_out"
]},
)
relu_op1
=
OpConfig
(
"relu"
,
inputs
=
{
"X"
:
[
"unsqueeze1_out"
],
},
outputs
=
{
"Out"
:
[
"relu1_out"
]},
)
scale_op2
=
OpConfig
(
"scale"
,
inputs
=
{
"X"
:
[
"scale_x"
],
},
scale
=
2.0
,
bias
=
1.0
,
bias_after_scale
=
True
,
outputs
=
{
"Out"
:
[
"scale2_out"
]},
)
unsqueeze_op2
=
OpConfig
(
"unsqueeze2"
,
inputs
=
{
"X"
:
[
"scale2_out"
],
},
axes
=
[
axis
],
outputs
=
{
"Out"
:
[
"unsqueeze2_out"
]},
)
relu_op2
=
OpConfig
(
"relu"
,
inputs
=
{
"X"
:
[
"unsqueeze2_out"
],
},
outputs
=
{
"Out"
:
[
"relu2_out"
]},
)
ops
=
[
scale_op0
,
unsqueeze_op0
,
relu_op0
,
scale_op1
,
unsqueeze_op1
,
relu_op1
,
scale_op2
,
unsqueeze_op2
,
relu_op2
,
]
program_config
=
ProgramConfig
(
ops
=
ops
,
weights
=
{},
inputs
=
{
"scale_x"
:
TensorConfig
(
shape
=
scale_x
),
},
outputs
=
[
"relu0_out"
,
"relu1_out"
,
"relu2_out"
],
)
return
program_config
class
TestDeleteRepeatedGatherPass
(
PassAutoScanTest
):
def
sample_predictor_configs
(
self
,
program_config
):
config
=
self
.
create_inference_config
(
use_xpu
=
True
)
yield
config
,
[
'scale'
,
'gather'
,
'relu'
,
'relu'
,
'relu'
],
(
1e-5
,
1e-5
)
def
sample_program_config
(
self
,
draw
):
scale_x
=
draw
(
st
.
lists
(
st
.
integers
(
min_value
=
3
,
max_value
=
20
),
min_size
=
2
,
max_size
=
4
)
)
axis
=
0
def
generate_index
(
*
args
,
**
kwargs
):
return
np
.
array
([
0
]).
astype
(
np
.
int64
)
gather_index
=
np
.
array
([
0
]).
astype
(
np
.
int64
)
scale_op0
=
OpConfig
(
"scale"
,
inputs
=
{
"X"
:
[
"scale_x"
],
},
scale
=
2.0
,
bias
=
1.0
,
bias_after_scale
=
True
,
outputs
=
{
"Out"
:
[
"scale0_out"
]},
)
gather_op0
=
OpConfig
(
"gather"
,
inputs
=
{
"X"
:
[
"scale0_out"
],
"Index"
:
[
"gather_index"
]},
axis
=
axis
,
outputs
=
{
"Out"
:
[
"gather0_out"
]},
)
relu_op0
=
OpConfig
(
"relu"
,
inputs
=
{
"X"
:
[
"gather0_out"
],
},
outputs
=
{
"Out"
:
[
"relu0_out"
]},
)
scale_op1
=
OpConfig
(
"scale"
,
inputs
=
{
"X"
:
[
"scale_x"
],
},
scale
=
2.0
,
bias
=
1.0
,
bias_after_scale
=
True
,
outputs
=
{
"Out"
:
[
"scale1_out"
]},
)
gather_op1
=
OpConfig
(
"gather"
,
inputs
=
{
"X"
:
[
"scale1_out"
],
"Index"
:
[
"gather_index"
]},
axis
=
axis
,
outputs
=
{
"Out"
:
[
"gather1_out"
]},
)
relu_op1
=
OpConfig
(
"relu"
,
inputs
=
{
"X"
:
[
"gather1_out"
],
},
outputs
=
{
"Out"
:
[
"relu1_out"
]},
)
scale_op2
=
OpConfig
(
"scale"
,
inputs
=
{
"X"
:
[
"scale_x"
],
},
scale
=
2.0
,
bias
=
1.0
,
bias_after_scale
=
True
,
outputs
=
{
"Out"
:
[
"scale2_out"
]},
)
gather_op2
=
OpConfig
(
"gather"
,
inputs
=
{
"X"
:
[
"scale2_out"
],
"Index"
:
[
"gather_index"
]},
axis
=
axis
,
outputs
=
{
"Out"
:
[
"gather2_out"
]},
)
relu_op2
=
OpConfig
(
"relu"
,
inputs
=
{
"X"
:
[
"gather2_out"
],
},
outputs
=
{
"Out"
:
[
"relu2_out"
]},
)
ops
=
[
scale_op0
,
gather_op0
,
relu_op0
,
scale_op1
,
gather_op1
,
relu_op1
,
scale_op2
,
gather_op2
,
relu_op2
,
]
program_config
=
ProgramConfig
(
ops
=
ops
,
weights
=
{},
inputs
=
{
"scale_x"
:
TensorConfig
(
shape
=
scale_x
),
"gather_index"
:
TensorConfig
(
data_gen
=
partial
(
generate_index
)),
},
outputs
=
[
"relu0_out"
,
"relu1_out"
,
"relu2_out"
],
)
return
program_config
def
test
(
self
):
self
.
run_and_statis
(
quant
=
False
,
max_examples
=
25
,
passes
=
[
"delete_repeated_ops_pass"
],
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
test/ir/inference/test_xpu_fused_continuous_same_ops_pass.py
0 → 100644
浏览文件 @
552ed8d8
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
hypothesis.strategies
as
st
from
auto_scan_test
import
PassAutoScanTest
from
program_config
import
OpConfig
,
ProgramConfig
,
TensorConfig
class
TestFusedSameUnSqueezePass
(
PassAutoScanTest
):
def
sample_predictor_configs
(
self
,
program_config
):
config
=
self
.
create_inference_config
(
use_xpu
=
True
)
yield
config
,
[
'scale'
,
'unsqueeze2'
],
(
1e-5
,
1e-5
)
def
sample_program_config
(
self
,
draw
):
scale_x
=
draw
(
st
.
lists
(
st
.
integers
(
min_value
=
1
,
max_value
=
20
),
min_size
=
1
,
max_size
=
3
)
)
first_unsqueeze_axis
=
0
second_unsqueeze_axis
=
1
third_unsqueeze_axis
=
2
scale_op0
=
OpConfig
(
"scale"
,
inputs
=
{
"X"
:
[
"scale_x"
],
},
scale
=
2.0
,
bias
=
1.0
,
bias_after_scale
=
True
,
outputs
=
{
"Out"
:
[
"scale0_out"
]},
)
unsqueeze_op0
=
OpConfig
(
"unsqueeze2"
,
inputs
=
{
"X"
:
[
"scale0_out"
],
},
axes
=
[
first_unsqueeze_axis
],
outputs
=
{
"Out"
:
[
"unsqueeze0_out"
]},
)
unsqueeze_op1
=
OpConfig
(
"unsqueeze2"
,
inputs
=
{
"X"
:
[
"unsqueeze0_out"
],
},
axes
=
[
second_unsqueeze_axis
],
outputs
=
{
"Out"
:
[
"unsqueeze1_out"
]},
)
unsqueeze_op2
=
OpConfig
(
"unsqueeze2"
,
inputs
=
{
"X"
:
[
"unsqueeze1_out"
],
},
axes
=
[
third_unsqueeze_axis
],
outputs
=
{
"Out"
:
[
"unsqueeze2_out"
]},
)
ops
=
[
scale_op0
,
unsqueeze_op0
,
unsqueeze_op1
,
unsqueeze_op2
]
program_config
=
ProgramConfig
(
ops
=
ops
,
weights
=
{},
inputs
=
{
"scale_x"
:
TensorConfig
(
shape
=
scale_x
),
},
outputs
=
[
"unsqueeze2_out"
],
)
return
program_config
class
TestFusedSameReshapePass
(
PassAutoScanTest
):
def
sample_predictor_configs
(
self
,
program_config
):
config
=
self
.
create_inference_config
(
use_xpu
=
True
)
yield
config
,
[
'scale'
,
'reshape2'
],
(
1e-5
,
1e-5
)
def
sample_program_config
(
self
,
draw
):
scale_x
=
draw
(
st
.
sampled_from
([[
8
,
16
],
[
16
,
32
],
[
64
,
16
],
[
16
,
8
],
[
16
,
16
]])
)
first_reshape_shape
=
[
-
1
,
16
,
4
]
second_reshape_shape
=
[
-
1
,
8
]
scale_op0
=
OpConfig
(
"scale"
,
inputs
=
{
"X"
:
[
"scale_x"
],
},
scale
=
2.0
,
bias
=
1.0
,
bias_after_scale
=
True
,
outputs
=
{
"Out"
:
[
"scale0_out"
]},
)
reshape_op0
=
OpConfig
(
"reshape2"
,
inputs
=
{
"X"
:
[
"scale0_out"
],
},
shape
=
first_reshape_shape
,
outputs
=
{
"Out"
:
[
"reshape0_out"
]},
)
reshape_op1
=
OpConfig
(
"reshape2"
,
inputs
=
{
"X"
:
[
"reshape0_out"
],
},
shape
=
second_reshape_shape
,
outputs
=
{
"Out"
:
[
"reshape1_out"
]},
)
ops
=
[
scale_op0
,
reshape_op0
,
reshape_op1
]
program_config
=
ProgramConfig
(
ops
=
ops
,
weights
=
{},
inputs
=
{
"scale_x"
:
TensorConfig
(
shape
=
scale_x
),
},
outputs
=
[
"reshape1_out"
],
)
return
program_config
def
test
(
self
):
self
.
run_and_statis
(
quant
=
False
,
max_examples
=
25
,
min_success_num
=
5
,
passes
=
[
"fused_continuous_same_ops_pass"
],
)
if
__name__
==
"__main__"
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录