Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
wmsofts
Paddle
提交
e2cdd4a3
P
Paddle
项目概览
wmsofts
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
e2cdd4a3
编写于
3月 13, 2023
作者:
Z
zhupengyang
提交者:
GitHub
3月 13, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[xpu] optimize multi_encoder_xpu_fuse_pass performance (#51346)
上级
e6ca78c2
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
227 addition
and
178 deletion
+227
-178
paddle/fluid/framework/ir/xpu/multi_encoder_xpu_fuse_pass.cc
paddle/fluid/framework/ir/xpu/multi_encoder_xpu_fuse_pass.cc
+22
-175
paddle/fluid/framework/ir/xpu/multi_encoder_xpu_fuse_pass.h
paddle/fluid/framework/ir/xpu/multi_encoder_xpu_fuse_pass.h
+194
-0
paddle/fluid/framework/ir/xpu/pass_utils.cc
paddle/fluid/framework/ir/xpu/pass_utils.cc
+11
-3
未找到文件。
paddle/fluid/framework/ir/xpu/multi_encoder_xpu_fuse_pass.cc
浏览文件 @
e2cdd4a3
...
...
@@ -23,26 +23,15 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/xpu/multi_encoder_xpu_fuse_pass.h"
#include <string>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/ir/xpu/pass_utils.h"
#include "paddle/fluid/framework/ir/xpu/quant_utils.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/phi/kernels/concat_kernel.h"
namespace
phi
{
class
DenseTensor
;
}
// namespace phi
namespace
paddle
{
namespace
framework
{
class
Scope
;
}
// namespace framework
}
// namespace paddle
namespace
paddle
{
namespace
framework
{
namespace
ir
{
...
...
@@ -515,175 +504,26 @@ SingleEncoderXPUPattern::SingleEncoderXPUPattern(
}
// namespace patterns
/*
step1: fuse single ops to single_encoder_xpu
step2: fuse mutitl single_encoder_xpu to multi_encoder_xpu
1. step1
Origin subgraph:
------------ input_variable*
| / | \
| / | \
| v_matmul q_matmul k_matmul
| | | |
| | | |
| v_add q_add add
| | | |
| | | |
| v_reshape q_reshape k_reshape
| | | |
| | | |
| v_transpose q_transpose k_transpose
| | | |
| | \ /
| | qk_matmul
| | |
| | |
| | qk_add
| | |
| | |
| | qk_softmax
| | |
| | |
| ---------qkv_matmul_0
| |
| |
| qkv_transpose
| |
| |
| qkv_reshape
| |
| |
| qkv_matmul_1
| |
| |
| qkv_add_0
| |
| |
----------------------qkv_add_1
|
|
layer_norm_1
/ \
| |
| qkv_matmul_2
| |
| |
| qkv_add_2
| |
| |
| qkv_act
| |
| |
| qkv_matmul_3
| |
| |
| qkv_add_3
| |
\ /
qkv_add_4
|
layer_norm
Fused subgraph:
single_encoder_xpu
2. step2
Origin subgraph:
...
|
single_encoder_xpu
|
(single_encoder_xpu)
|
(single_encoder_xpu)
|
...
Fused subgraph:
multi_encoder_xpu
*/
class
MultiEncoderXPUFusePass
:
public
FusePassBase
{
protected:
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
;
private:
int
ApplySingleEncoderXPUFuse
(
ir
::
Graph
*
graph
,
const
std
::
string
&
act_type
,
const
std
::
string
&
matmul_type_0
,
const
std
::
string
&
matmul_type_1
,
const
std
::
string
&
matmul_type_2
,
bool
norm_before
,
bool
with_q_scale
,
bool
with_mask
)
const
;
bool
ApplyMultiEncoderXPUFuse
(
ir
::
Graph
*
graph
)
const
;
// Mask must be fp32 even if model is fp16
int
CastMask
(
ir
::
Graph
*
graph
)
const
;
// 1. Transpose q_w, k_w, v_w
// 2. Concat q_w, k_w, v_w
// 3. Generate qkv_w_max tensor
// 4. Quant qkv_w to int16
void
PrepareQKVWeight
(
Graph
*
graph
,
Scope
*
scope
,
BlockDesc
*
block
,
Node
*
q_w
,
Node
*
k_w
,
Node
*
v_w
,
Node
**
qkv_w
,
Node
**
qkv_w_max
)
const
;
// 1. Cast bias to fp32
// 2. Concat q/k/v bias
void
PrepareQKVBias
(
Graph
*
graph
,
Scope
*
scope
,
BlockDesc
*
block
,
Node
*
q_bias
,
Node
*
k_bias
,
Node
*
v_bias
,
Node
**
qkv_bias
)
const
;
const
std
::
string
name_scope_
{
"multi_encoder_xpu_fuse_pass"
};
};
void
MultiEncoderXPUFusePass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
PADDLE_ENFORCE_NOT_NULL
(
graph
,
platform
::
errors
::
PreconditionNotMet
(
"graph should not be null."
));
Init
(
name_scope_
,
graph
);
std
::
vector
<
std
::
string
>
act_types
{
"gelu"
,
"relu"
};
std
::
vector
<
std
::
string
>
matmul_types_0
{
"matmul_v2"
,
"matmul"
,
"mul"
};
std
::
vector
<
std
::
string
>
matmul_types_1
{
"matmul_v2"
,
"matmul"
};
std
::
vector
<
std
::
string
>
matmul_types_2
{
"matmul_v2"
,
"matmul"
};
std
::
vector
<
bool
>
norm_befores
{
true
,
false
};
std
::
vector
<
bool
>
with_q_scales
{
true
,
false
};
std
::
vector
<
bool
>
with_masks
{
true
,
false
};
int
single_encoder_fused_counts
=
0
;
int
multi_encoder_fused_counts
=
0
;
for
(
auto
act_type
:
act_types
)
{
for
(
auto
matmul_type_0
:
matmul_types_0
)
{
for
(
auto
matmul_type_1
:
matmul_types_1
)
{
for
(
auto
matmul_type_2
:
matmul_types_2
)
{
for
(
auto
norm_before
:
norm_befores
)
{
for
(
auto
with_q_scale
:
with_q_scales
)
{
for
(
auto
with_mask
:
with_masks
)
{
single_encoder_fused_counts
+=
ApplySingleEncoderXPUFuse
(
graph
,
act_type
,
matmul_type_0
,
matmul_type_1
,
matmul_type_2
,
norm_before
,
with_q_scale
,
with_mask
);
while
(
ApplyMultiEncoderXPUFuse
(
graph
))
{
multi_encoder_fused_counts
++
;
}
}
}
}
}
}
auto
pattern_params
=
GeneratePatternParams
();
for
(
auto
pattern_param
:
pattern_params
)
{
single_encoder_fused_counts
+=
ApplySingleEncoderXPUFuse
(
graph
,
pattern_param
.
act_type
,
pattern_param
.
matmul_type_0
,
pattern_param
.
matmul_type_1
,
pattern_param
.
matmul_type_2
,
pattern_param
.
norm_before
,
pattern_param
.
with_q_scale
,
pattern_param
.
with_mask
);
while
(
ApplyMultiEncoderXPUFuse
(
graph
))
{
multi_encoder_fused_counts
++
;
}
}
int
cast_mask_counts
=
CastMask
(
graph
);
...
...
@@ -1372,6 +1212,13 @@ int MultiEncoderXPUFusePass::CastMask(ir::Graph* graph) const {
return
cast_counts
;
}
std
::
vector
<
PatternParam
>
MultiEncoderXPUFusePass
::
GeneratePatternParams
()
const
{
return
std
::
vector
<
PatternParam
>
{
// Params are arranged in alphabetic order
{
"gelu"
,
"matmul_v2"
,
"matmul"
,
"matmul_v2"
,
false
,
false
,
true
}};
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
...
...
paddle/fluid/framework/ir/xpu/multi_encoder_xpu_fuse_pass.h
0 → 100644
浏览文件 @
e2cdd4a3
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/pass.h"
namespace
phi
{
class
DenseTensor
;
}
// namespace phi
namespace
paddle
{
namespace
framework
{
class
Scope
;
}
// namespace framework
}
// namespace paddle
namespace
paddle
{
namespace
framework
{
namespace
ir
{
/*
step1: fuse single ops to single_encoder_xpu
step2: fuse mutitl single_encoder_xpu to multi_encoder_xpu
1. step1
Origin subgraph:
------------ input_variable*
| / | \
| / | \
| v_matmul q_matmul k_matmul
| | | |
| | | |
| v_add q_add add
| | | |
| | | |
| v_reshape q_reshape k_reshape
| | | |
| | | |
| v_transpose q_transpose k_transpose
| | | |
| | \ /
| | qk_matmul
| | |
| | |
| | qk_add
| | |
| | |
| | qk_softmax
| | |
| | |
| ---------qkv_matmul_0
| |
| |
| qkv_transpose
| |
| |
| qkv_reshape
| |
| |
| qkv_matmul_1
| |
| |
| qkv_add_0
| |
| |
----------------------qkv_add_1
|
|
layer_norm_1
/ \
| |
| qkv_matmul_2
| |
| |
| qkv_add_2
| |
| |
| qkv_act
| |
| |
| qkv_matmul_3
| |
| |
| qkv_add_3
| |
\ /
qkv_add_4
|
layer_norm
Fused subgraph:
single_encoder_xpu
2. step2
Origin subgraph:
...
|
single_encoder_xpu
|
(single_encoder_xpu)
|
(single_encoder_xpu)
|
...
Fused subgraph:
multi_encoder_xpu
*/
struct
PatternParam
{
std
::
string
act_type
;
// "gelu", "relu"
std
::
string
matmul_type_0
;
// "matmul_v2", "matmul", "mul"
std
::
string
matmul_type_1
;
// "matmul_v2", "matmul"
std
::
string
matmul_type_2
;
// "matmul_v2", "matmul"
bool
norm_before
;
bool
with_q_scale
;
bool
with_mask
;
};
class
MultiEncoderXPUFusePass
:
public
FusePassBase
{
protected:
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
;
private:
int
ApplySingleEncoderXPUFuse
(
ir
::
Graph
*
graph
,
const
std
::
string
&
act_type
,
const
std
::
string
&
matmul_type_0
,
const
std
::
string
&
matmul_type_1
,
const
std
::
string
&
matmul_type_2
,
bool
norm_before
,
bool
with_q_scale
,
bool
with_mask
)
const
;
bool
ApplyMultiEncoderXPUFuse
(
ir
::
Graph
*
graph
)
const
;
// Mask must be fp32 even if model is fp16
int
CastMask
(
ir
::
Graph
*
graph
)
const
;
// 1. Transpose q_w, k_w, v_w
// 2. Concat q_w, k_w, v_w
// 3. Generate qkv_w_max tensor
// 4. Quant qkv_w to int16
void
PrepareQKVWeight
(
Graph
*
graph
,
Scope
*
scope
,
BlockDesc
*
block
,
Node
*
q_w
,
Node
*
k_w
,
Node
*
v_w
,
Node
**
qkv_w
,
Node
**
qkv_w_max
)
const
;
// 1. Cast bias to fp32
// 2. Concat q/k/v bias
void
PrepareQKVBias
(
Graph
*
graph
,
Scope
*
scope
,
BlockDesc
*
block
,
Node
*
q_bias
,
Node
*
k_bias
,
Node
*
v_bias
,
Node
**
qkv_bias
)
const
;
// Iterating all attrs costs too much time.
// Just provide several cases.
std
::
vector
<
PatternParam
>
GeneratePatternParams
()
const
;
const
std
::
string
name_scope_
{
"multi_encoder_xpu_fuse_pass"
};
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/xpu/pass_utils.cc
浏览文件 @
e2cdd4a3
...
...
@@ -105,6 +105,12 @@ size_t HashTensor(const phi::DenseTensor& in) {
template
size_t
HashTensor
<
int16_t
>(
const
phi
::
DenseTensor
&
in
);
template
size_t
HashTensor
<
float
>(
const
phi
::
DenseTensor
&
in
);
std
::
string
GetPrefixWithoutHash
(
const
std
::
string
&
name
,
const
phi
::
DenseTensor
&
tensor
)
{
std
::
size_t
found
=
name
.
find
(
"_#"
);
return
found
==
std
::
string
::
npos
?
name
:
name
.
substr
(
0
,
found
);
}
template
<
typename
T
>
void
PrepareWeight
(
Graph
*
graph
,
Scope
*
scope
,
...
...
@@ -122,8 +128,9 @@ void PrepareWeight(Graph* graph,
size_t
dst_hash
=
HashTensor
<
T
>
(
dst_tensor
);
size_t
dst_max_hash
=
HashTensor
<
float
>
(
dst_max_tensor
);
std
::
string
dst_name
=
src_name
+
"_"
+
std
::
to_string
(
dst_hash
);
std
::
string
dst_max_name
=
src_name
+
"_max_"
+
std
::
to_string
(
dst_max_hash
);
std
::
string
pre_name
=
GetPrefixWithoutHash
(
src_name
,
*
src_tensor
);
std
::
string
dst_name
=
pre_name
+
"_#"
+
std
::
to_string
(
dst_hash
);
std
::
string
dst_max_name
=
pre_name
+
"_max_#"
+
std
::
to_string
(
dst_max_hash
);
*
dst
=
FindNodeWithName
(
graph
,
dst_name
);
if
(
*
dst
==
nullptr
)
{
// Create dst node
...
...
@@ -199,7 +206,8 @@ void PrepareBias(
phi
::
DenseTensor
dst_tensor
;
CastToFp32
(
src_tensor
,
&
dst_tensor
);
size_t
dst_hash
=
HashTensor
<
float
>
(
dst_tensor
);
std
::
string
dst_name
=
src_name
+
"_"
+
std
::
to_string
(
dst_hash
);
std
::
string
pre_name
=
GetPrefixWithoutHash
(
src_name
,
*
src_tensor
);
std
::
string
dst_name
=
pre_name
+
"_#"
+
std
::
to_string
(
dst_hash
);
*
dst
=
FindNodeWithName
(
graph
,
dst_name
);
if
(
*
dst
==
nullptr
)
{
// Create dst node
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录