Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
439b2b94
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
439b2b94
编写于
12月 15, 2022
作者:
W
Wangzheee
提交者:
GitHub
12月 15, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix embedding multihead (#49085)
上级
e577040e
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
14 addition
and
9 deletion
+14
-9
paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc
...fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc
+1
-1
paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc
...e/fluid/inference/tensorrt/convert/multihead_matmul_op.cc
+10
-5
paddle/fluid/inference/tensorrt/plugin/many_emb_layernorm_varseqlen_plugin.cu
...ce/tensorrt/plugin/many_emb_layernorm_varseqlen_plugin.cu
+3
-3
未找到文件。
paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc
浏览文件 @
439b2b94
...
...
@@ -181,7 +181,7 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter {
layer
=
plugin_layer
;
auto
output_name
=
op_desc
.
Output
(
"Out"
)[
0
];
RreplenishLayerAndOutput
(
layer
,
"ManyEmbLayerNorm
PluginDynamic_
V1"
,
"ManyEmbLayerNorm
VarlenPluginDynamic
V1"
,
{
output_name
,
std
::
string
(
"qkv_plugin_mask"
),
std
::
string
(
"max_seqlen_tensor"
)},
...
...
paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc
浏览文件 @
439b2b94
...
...
@@ -257,7 +257,8 @@ class MultiheadMatMulOpConverter : public OpConverter {
max_seqlen_tensor
);
// max_seqlen, eval_placeholder_3
auto
plugin_layer
=
engine_
->
network
()
->
addPluginV2
(
plugin_inputs
.
data
(),
plugin_inputs
.
size
(),
*
plugin
);
layer
=
plugin_layer
;
RreplenishLayerAndOutput
(
plugin_layer
,
"multihead_matmul"
,
{
output_name
},
test_mode
);
}
else
{
int
head_size
=
hidden_out
/
head_number
;
// [3, head_number, head_size, hidden_in] -> [head_number, 3,
...
...
@@ -381,7 +382,8 @@ class MultiheadMatMulOpConverter : public OpConverter {
auto
plugin_layer
=
engine_
->
network
()
->
addPluginV2
(
plugin_inputs
.
data
(),
plugin_inputs
.
size
(),
*
plugin
);
plugin_layer
->
setName
(
(
"CustomQKVToContextPluginDynamic: "
+
output_name
).
c_str
());
// recover no_varlen output
if
(
!
flag_varseqlen
)
{
std
::
vector
<
nvinfer1
::
ITensor
*>
output_transformer
;
...
...
@@ -394,7 +396,10 @@ class MultiheadMatMulOpConverter : public OpConverter {
engine_
->
AddDynamicPlugin
(
output_transformer
.
data
(),
output_transformer
.
size
(),
plugin
);
layer
=
transformer_output_layer
;
engine_
->
SetITensor
(
output_name
,
transformer_output_layer
->
getOutput
(
0
));
}
else
{
engine_
->
SetITensor
(
output_name
,
plugin_layer
->
getOutput
(
0
));
}
}
}
else
{
...
...
@@ -776,6 +781,8 @@ class MultiheadMatMulOpConverter : public OpConverter {
new
plugin
::
QkvToContextPluginDynamic
(
hidden_in
,
head_number
,
head_size
,
scale
,
with_fp16
);
layer
=
engine_
->
AddDynamicPlugin
(
plugin_inputs
.
data
(),
2
,
plugin
);
RreplenishLayerAndOutput
(
layer
,
"multihead_matmul"
,
{
output_name
},
test_mode
);
}
}
}
else
{
...
...
@@ -785,8 +792,6 @@ class MultiheadMatMulOpConverter : public OpConverter {
"You can use the config.SetTRTDynamicShapeInfo(...) interface to set "
"the shape information to run the dynamic shape mode."
));
}
RreplenishLayerAndOutput
(
layer
,
"multihead_matmul"
,
{
output_name
},
test_mode
);
}
};
...
...
paddle/fluid/inference/tensorrt/plugin/many_emb_layernorm_varseqlen_plugin.cu
浏览文件 @
439b2b94
...
...
@@ -255,7 +255,7 @@ bool EmbLayerNormVarSeqlenPluginBase::supportsFormatCombination(
desc
.
dims
.
d
[
0
]
==
prev
.
dims
.
d
[
0
];
}
if
(
pos
==
nbInputs
-
1
)
{
// mask id
return
desc
.
type
==
prev
.
t
ype
;
return
desc
.
type
==
mT
ype
;
}
// embedded sequence
if
(
pos
==
nbInputs
)
{
...
...
@@ -265,11 +265,11 @@ bool EmbLayerNormVarSeqlenPluginBase::supportsFormatCombination(
}
// mask(HFace) or pre_layernorm_bias(MTron)
if
(
pos
==
nbInputs
+
1
)
{
return
desc
.
type
==
prev
.
t
ype
;
return
desc
.
type
==
mT
ype
;
}
// max seqlen
if
(
pos
==
nbInputs
+
2
)
{
return
desc
.
type
==
prev
.
t
ype
;
return
desc
.
type
==
mT
ype
;
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录