Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
94bacb47
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
94bacb47
编写于
6月 23, 2022
作者:
W
WJJ1995
提交者:
GitHub
6月 23, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[cherry pick][Inference]Enhance gpu multihead matmul v3 fuse pass (#43765)
上级
babba557
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
225 addition
and
1 deletion
+225
-1
paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc
paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc
+2
-1
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+2
-0
python/paddle/fluid/tests/unittests/ir/inference/test_matmul_scale_fuse_pass.py
...sts/unittests/ir/inference/test_matmul_scale_fuse_pass.py
+6
-0
python/paddle/fluid/tests/unittests/ir/inference/test_multihead_matmul_fuse_pass_v3.py
...ttests/ir/inference/test_multihead_matmul_fuse_pass_v3.py
+215
-0
未找到文件。
paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc
浏览文件 @
94bacb47
...
@@ -423,7 +423,8 @@ PDNode* MultiHeadMatmulPattern::operator()() {
...
@@ -423,7 +423,8 @@ PDNode* MultiHeadMatmulPattern::operator()() {
}
}
PDNode
*
MultiHeadMatmulV3Pattern
::
operator
()()
{
PDNode
*
MultiHeadMatmulV3Pattern
::
operator
()()
{
std
::
unordered_set
<
std
::
string
>
matmul_ops
{
"matmul"
,
"matmul_v2"
};
// Add mul op to support huggingface onnx model convertsion by x2paddle
std
::
unordered_set
<
std
::
string
>
matmul_ops
{
"mul"
,
"matmul"
,
"matmul_v2"
};
auto
*
input0
=
pattern
->
NewNode
(
input0_repr
());
auto
*
input0
=
pattern
->
NewNode
(
input0_repr
());
input0
->
assert_is_ops_input
(
matmul_ops
);
input0
->
assert_is_ops_input
(
matmul_ops
);
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
94bacb47
...
@@ -145,6 +145,8 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
...
@@ -145,6 +145,8 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
"gpu_cpu_flatten2_matmul_fuse_pass"
,
//
"gpu_cpu_flatten2_matmul_fuse_pass"
,
//
"gpu_cpu_map_matmul_v2_to_mul_pass"
,
//
"gpu_cpu_map_matmul_v2_to_mul_pass"
,
//
"gpu_cpu_map_matmul_v2_to_matmul_pass"
,
//
"gpu_cpu_map_matmul_v2_to_matmul_pass"
,
//
"matmul_scale_fuse_pass"
,
//
"multihead_matmul_fuse_pass_v3"
,
//
"gpu_cpu_map_matmul_to_mul_pass"
,
//
"gpu_cpu_map_matmul_to_mul_pass"
,
//
"fc_fuse_pass"
,
//
"fc_fuse_pass"
,
//
"fc_elementwise_layernorm_fuse_pass"
,
//
"fc_elementwise_layernorm_fuse_pass"
,
//
...
...
python/paddle/fluid/tests/unittests/ir/inference/test_matmul_scale_fuse_pass.py
浏览文件 @
94bacb47
...
@@ -43,6 +43,12 @@ class TestMatmulScaleFusePass(PassAutoScanTest):
...
@@ -43,6 +43,12 @@ class TestMatmulScaleFusePass(PassAutoScanTest):
config
=
self
.
create_inference_config
(
use_mkldnn
=
True
)
config
=
self
.
create_inference_config
(
use_mkldnn
=
True
)
yield
config
,
[
"matmul"
,
],
(
1e-5
,
1e-5
)
yield
config
,
[
"matmul"
,
],
(
1e-5
,
1e-5
)
# gpu
config
=
self
.
create_inference_config
(
use_gpu
=
True
)
yield
config
,
[
"matmul"
,
],
(
1e-5
,
1e-5
)
def
sample_program_config
(
self
,
draw
):
def
sample_program_config
(
self
,
draw
):
# 1. Generate shape and attr of matmul
# 1. Generate shape and attr of matmul
x_shape
=
draw
(
x_shape
=
draw
(
...
...
python/paddle/fluid/tests/unittests/ir/inference/test_multihead_matmul_fuse_pass_v3.py
0 → 100644
浏览文件 @
94bacb47
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
auto_scan_test
import
PassAutoScanTest
,
IgnoreReasons
from
program_config
import
TensorConfig
,
ProgramConfig
,
OpConfig
import
numpy
as
np
import
paddle.inference
as
paddle_infer
from
functools
import
partial
from
typing
import
Optional
,
List
,
Callable
,
Dict
,
Any
,
Set
import
unittest
import
hypothesis
from
hypothesis
import
given
,
settings
,
seed
,
example
,
assume
,
reproduce_failure
import
hypothesis.strategies
as
st
class
TestMultiheadMatmulFusePass
(
PassAutoScanTest
):
def
sample_predictor_configs
(
self
,
program_config
):
# gpu
config
=
self
.
create_inference_config
(
use_gpu
=
True
)
yield
config
,
[
"multihead_matmul"
,
"mul"
],
(
1e-2
,
1e-3
)
def
sample_program_config
(
self
,
draw
):
def
generate_mul_input
():
return
np
.
random
.
random
([
1
,
128
,
768
]).
astype
(
np
.
float32
)
-
0.5
def
generate_elewise_input
():
return
np
.
random
.
random
([
1
,
12
,
128
,
128
]).
astype
(
np
.
float32
)
mul_0
=
OpConfig
(
"mul"
,
inputs
=
{
"X"
:
[
"mul_x"
],
"Y"
:
[
"mul_0_w"
]
},
outputs
=
{
"Out"
:
[
"mul_0_out"
]},
x_num_col_dims
=
2
,
y_num_col_dims
=
1
)
mul_1
=
OpConfig
(
"mul"
,
inputs
=
{
"X"
:
[
"mul_x"
],
"Y"
:
[
"mul_1_w"
]
},
outputs
=
{
"Out"
:
[
"mul_1_out"
]},
x_num_col_dims
=
2
,
y_num_col_dims
=
1
)
mul_2
=
OpConfig
(
"mul"
,
inputs
=
{
"X"
:
[
"mul_x"
],
"Y"
:
[
"mul_2_w"
]
},
outputs
=
{
"Out"
:
[
"mul_2_out"
]},
x_num_col_dims
=
2
,
y_num_col_dims
=
1
)
ele_0
=
OpConfig
(
"elementwise_add"
,
inputs
=
{
"X"
:
[
mul_0
.
outputs
[
"Out"
][
0
]],
"Y"
:
[
"ele_0_w"
]
},
outputs
=
{
"Out"
:
[
"ele_0_out"
]},
axis
=-
1
)
ele_1
=
OpConfig
(
"elementwise_add"
,
inputs
=
{
"X"
:
[
mul_1
.
outputs
[
"Out"
][
0
]],
"Y"
:
[
"ele_1_w"
]
},
outputs
=
{
"Out"
:
[
"ele_1_out"
]},
axis
=-
1
)
ele_2
=
OpConfig
(
"elementwise_add"
,
inputs
=
{
"X"
:
[
mul_2
.
outputs
[
"Out"
][
0
]],
"Y"
:
[
"ele_2_w"
]
},
outputs
=
{
"Out"
:
[
"ele_2_out"
]},
axis
=-
1
)
reshape_0
=
OpConfig
(
"reshape2"
,
inputs
=
{
"X"
:
[
ele_0
.
outputs
[
"Out"
][
0
]]},
outputs
=
{
"Out"
:
[
"reshape_0_out"
],
"XShape"
:
[
"reshape_0_Xout"
]
},
shape
=
(
1
,
128
,
12
,
64
))
reshape_1
=
OpConfig
(
"reshape2"
,
inputs
=
{
"X"
:
[
ele_1
.
outputs
[
"Out"
][
0
]]},
outputs
=
{
"Out"
:
[
"reshape_1_out"
],
"XShape"
:
[
"reshape_1_Xout"
]
},
shape
=
(
1
,
128
,
12
,
64
))
reshape_2
=
OpConfig
(
"reshape2"
,
inputs
=
{
"X"
:
[
ele_2
.
outputs
[
"Out"
][
0
]]},
outputs
=
{
"Out"
:
[
"reshape_2_out"
],
"XShape"
:
[
"reshape_2_Xout"
]
},
shape
=
(
1
,
128
,
12
,
64
))
transpose_0
=
OpConfig
(
"transpose2"
,
inputs
=
{
"X"
:
[
reshape_0
.
outputs
[
"Out"
][
0
]]},
outputs
=
{
"Out"
:
[
"transpose_0_out"
]},
axis
=
(
0
,
2
,
1
,
3
))
transpose_1
=
OpConfig
(
"transpose2"
,
inputs
=
{
"X"
:
[
reshape_1
.
outputs
[
"Out"
][
0
]]},
outputs
=
{
"Out"
:
[
"transpose_1_out"
]},
axis
=
(
0
,
2
,
3
,
1
))
transpose_2
=
OpConfig
(
"transpose2"
,
inputs
=
{
"X"
:
[
reshape_2
.
outputs
[
"Out"
][
0
]]},
outputs
=
{
"Out"
:
[
"transpose_2_out"
]},
axis
=
(
0
,
2
,
1
,
3
))
matmul_0
=
OpConfig
(
"matmul"
,
inputs
=
{
"X"
:
[
transpose_0
.
outputs
[
"Out"
][
0
]],
"Y"
:
[
transpose_1
.
outputs
[
"Out"
][
0
]]
},
outputs
=
{
"Out"
:
[
"matmul_0_out"
]},
alpha
=
0.125
,
transpose_X
=
False
,
transpose_Y
=
False
,
fused_reshape_Out
=
[],
fused_reshape_X
=
[],
fused_reshape_Y
=
[],
fused_transpose_Out
=
[],
fused_transpose_X
=
[],
fused_transpose_Y
=
[])
ele_3
=
OpConfig
(
"elementwise_add"
,
inputs
=
{
"X"
:
[
matmul_0
.
outputs
[
"Out"
][
0
]],
"Y"
:
[
"eltadd_qk_b_var"
]
},
outputs
=
{
"Out"
:
[
"ele_3_out"
]},
axis
=-
1
)
softmax_op
=
OpConfig
(
"softmax"
,
inputs
=
{
"X"
:
[
ele_3
.
outputs
[
"Out"
][
0
]]},
outputs
=
{
"Out"
:
[
"softmax_out"
]},
axis
=
3
,
is_test
=
True
)
matmul_1
=
OpConfig
(
"matmul"
,
inputs
=
{
"X"
:
[
softmax_op
.
outputs
[
"Out"
][
0
]],
"Y"
:
[
transpose_2
.
outputs
[
"Out"
][
0
]]
},
outputs
=
{
"Out"
:
[
"matmul_1_out"
]},
alpha
=
1.0
,
transpose_X
=
False
,
transpose_Y
=
False
,
fused_reshape_Out
=
[],
fused_reshape_X
=
[],
fused_reshape_Y
=
[],
fused_transpose_Out
=
[],
fused_transpose_X
=
[],
fused_transpose_Y
=
[])
transpose_3
=
OpConfig
(
"transpose2"
,
inputs
=
{
"X"
:
[
matmul_1
.
outputs
[
"Out"
][
0
]]},
outputs
=
{
"Out"
:
[
"transpose_3_out"
]},
axis
=
(
0
,
2
,
1
,
3
))
reshape_3
=
OpConfig
(
"reshape2"
,
inputs
=
{
"X"
:
[
transpose_3
.
outputs
[
"Out"
][
0
]]},
outputs
=
{
"Out"
:
[
"reshape_3_out"
],
"XShape"
:
[
"reshape_3_Xout"
]
},
shape
=
(
1
,
128
,
768
))
mul_3
=
OpConfig
(
"mul"
,
inputs
=
{
"X"
:
[
reshape_3
.
outputs
[
"Out"
][
0
]],
"Y"
:
[
"mul_3_w"
]
},
outputs
=
{
"Out"
:
[
"mul_3_out"
]},
x_num_col_dims
=
2
,
y_num_col_dims
=
1
)
ops
=
[
mul_0
,
mul_1
,
mul_2
,
ele_0
,
ele_1
,
ele_2
,
reshape_0
,
reshape_1
,
reshape_2
,
transpose_0
,
transpose_1
,
transpose_2
,
matmul_0
,
ele_3
,
softmax_op
,
matmul_1
,
transpose_3
,
reshape_3
,
mul_3
]
program_config
=
ProgramConfig
(
ops
=
ops
,
inputs
=
{
"mul_x"
:
TensorConfig
(
data_gen
=
partial
(
generate_mul_input
)),
"eltadd_qk_b_var"
:
TensorConfig
(
data_gen
=
partial
(
generate_elewise_input
))
},
weights
=
{
"mul_0_w"
:
TensorConfig
(
shape
=
[
768
,
768
]),
"mul_1_w"
:
TensorConfig
(
shape
=
[
768
,
768
]),
"mul_2_w"
:
TensorConfig
(
shape
=
[
768
,
768
]),
"mul_3_w"
:
TensorConfig
(
shape
=
[
768
,
768
]),
"ele_0_w"
:
TensorConfig
(
shape
=
[
768
]),
"ele_1_w"
:
TensorConfig
(
shape
=
[
768
]),
"ele_2_w"
:
TensorConfig
(
shape
=
[
768
])
},
outputs
=
[
ops
[
-
1
].
outputs
[
"Out"
][
0
]])
return
program_config
def
test
(
self
):
self
.
run_and_statis
(
quant
=
False
,
max_examples
=
100
,
min_success_num
=
1
,
passes
=
[
"multihead_matmul_fuse_pass_v3"
])
if
__name__
==
"__main__"
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录