Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
2953b708
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
2953b708
编写于
10月 31, 2022
作者:
F
feng_shuai
提交者:
GitHub
10月 31, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat: add int8 support for vit (#47330)
* feat: add int8 support for vit * test:add test
上级
34d13d6a
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
68 addition
and
8 deletion
+68
-8
paddle/fluid/framework/ir/vit_attention_fuse_pass.cc
paddle/fluid/framework/ir/vit_attention_fuse_pass.cc
+26
-0
paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc
...e/fluid/inference/tensorrt/convert/multihead_matmul_op.cc
+31
-7
python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multihead_matmul.py
...ittests/ir/inference/test_trt_convert_multihead_matmul.py
+11
-1
未找到文件。
paddle/fluid/framework/ir/vit_attention_fuse_pass.cc
浏览文件 @
2953b708
...
@@ -56,6 +56,22 @@ namespace paddle {
...
@@ -56,6 +56,22 @@ namespace paddle {
namespace
framework
{
namespace
framework
{
namespace
ir
{
namespace
ir
{
bool
HasScale
(
OpDesc
*
const
op_ptr
,
std
::
string
*
name
,
std
::
string
regexp
=
"Input_scale_"
)
{
name
->
clear
();
std
::
unordered_map
<
std
::
string
,
Attribute
>
attr_map
=
op_ptr
->
GetAttrMap
();
std
::
unordered_map
<
std
::
string
,
Attribute
>::
iterator
iter
;
int
len
=
regexp
.
size
();
for
(
iter
=
attr_map
.
begin
();
iter
!=
attr_map
.
end
();
iter
++
)
{
if
(
regexp
==
iter
->
first
.
substr
(
0
,
len
))
{
*
name
=
iter
->
first
;
return
true
;
}
}
return
false
;
}
void
VitAttentionFusePass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
void
VitAttentionFusePass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
GraphPatternDetector
gpd
;
GraphPatternDetector
gpd
;
const
std
::
string
pattern_name
=
"vit_attention_fuse"
;
const
std
::
string
pattern_name
=
"vit_attention_fuse"
;
...
@@ -103,6 +119,16 @@ void VitAttentionFusePass::ApplyImpl(ir::Graph* graph) const {
...
@@ -103,6 +119,16 @@ void VitAttentionFusePass::ApplyImpl(ir::Graph* graph) const {
float
alpha
=
PADDLE_GET_CONST
(
float
,
scale1_op
->
Op
()
->
GetAttr
(
"scale"
));
float
alpha
=
PADDLE_GET_CONST
(
float
,
scale1_op
->
Op
()
->
GetAttr
(
"scale"
));
desc
.
SetAttr
(
"alpha"
,
alpha
);
desc
.
SetAttr
(
"alpha"
,
alpha
);
// int8 for fc
std
::
string
scale_name
;
if
(
HasScale
(
matmul0_op
->
Op
(),
&
scale_name
))
{
desc
.
SetAttr
(
"Input_scale"
,
matmul0_op
->
Op
()
->
GetAttr
(
scale_name
));
}
if
(
HasScale
(
elementwise0_op
->
Op
(),
&
scale_name
,
"Out"
))
{
desc
.
SetAttr
(
"fc_out_threshold"
,
elementwise0_op
->
Op
()
->
GetAttr
(
scale_name
));
}
// Create a new node for the fused op.
// Create a new node for the fused op.
auto
vit_attention_node
=
graph
->
CreateOpNode
(
&
desc
);
auto
vit_attention_node
=
graph
->
CreateOpNode
(
&
desc
);
...
...
paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc
浏览文件 @
2953b708
...
@@ -398,13 +398,37 @@ class MultiheadMatMulOpConverter : public OpConverter {
...
@@ -398,13 +398,37 @@ class MultiheadMatMulOpConverter : public OpConverter {
// add fc layer
// add fc layer
nvinfer1
::
ILayer
*
fc_layer
=
nullptr
;
nvinfer1
::
ILayer
*
fc_layer
=
nullptr
;
fc_layer
=
if
(
op_desc
.
HasAttr
(
"Input_scale"
))
{
TRT_ENGINE_ADD_LAYER
(
engine_
,
engine_
->
SetTensorDynamicRange
(
FullyConnected
,
reshape_before_fc_layer
->
getOutput
(
0
),
in_scale
);
*
reshape_before_fc_layer
->
getOutput
(
0
),
nvinfer1
::
DimsHW
nv_ksize
(
1
,
1
);
n
,
fc_layer
=
weight
,
TRT_ENGINE_ADD_LAYER
(
engine_
,
bias
);
Convolution
,
*
reshape_before_fc_layer
->
getOutput
(
0
),
n
,
nv_ksize
,
weight
,
bias
);
PADDLE_ENFORCE_EQ
(
op_desc
.
HasAttr
(
"fc_out_threshold"
),
true
,
platform
::
errors
::
InvalidArgument
(
"must have out threshold in multihead layers "
"in int8 mode"
));
float
out_scale
=
PADDLE_GET_CONST
(
float
,
op_desc
.
GetAttr
(
"fc_out_threshold"
));
engine_
->
SetTensorDynamicRange
(
fc_layer
->
getOutput
(
0
),
out_scale
);
}
else
{
fc_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
FullyConnected
,
*
reshape_before_fc_layer
->
getOutput
(
0
),
n
,
weight
,
bias
);
}
fc_layer
->
setName
(
(
"multihead_mamul_fc(Output: "
+
output_name
+
")"
).
c_str
());
// add shuffle for CustomQKVToContextPluginDynamic layer
// add shuffle for CustomQKVToContextPluginDynamic layer
auto
*
reshape_after_fc_layer
=
auto
*
reshape_after_fc_layer
=
...
...
python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_multihead_matmul.py
浏览文件 @
2953b708
...
@@ -818,7 +818,11 @@ class TrtConvertVitToMultiHeadMatmulTest(TrtLayerAutoScanTest):
...
@@ -818,7 +818,11 @@ class TrtConvertVitToMultiHeadMatmulTest(TrtLayerAutoScanTest):
"Y"
:
[
"matmul1_weight"
],
"Y"
:
[
"matmul1_weight"
],
},
},
"op_outputs"
:
{
"Out"
:
[
"matmul1_output"
]},
"op_outputs"
:
{
"Out"
:
[
"matmul1_output"
]},
"op_attrs"
:
{
"trans_x"
:
False
,
"trans_y"
:
False
},
"op_attrs"
:
{
"trans_x"
:
False
,
"trans_y"
:
False
,
"Input_scale_layer"
:
1.0
,
},
},
},
{
{
"op_type"
:
"elementwise_add"
,
"op_type"
:
"elementwise_add"
,
...
@@ -832,6 +836,7 @@ class TrtConvertVitToMultiHeadMatmulTest(TrtLayerAutoScanTest):
...
@@ -832,6 +836,7 @@ class TrtConvertVitToMultiHeadMatmulTest(TrtLayerAutoScanTest):
"Scale_x"
:
1.0
,
"Scale_x"
:
1.0
,
"Scale_y"
:
1.0
,
"Scale_y"
:
1.0
,
"axis"
:
2
,
"axis"
:
2
,
"Out"
:
1.0
,
},
},
},
},
{
{
...
@@ -1035,6 +1040,11 @@ class TrtConvertVitToMultiHeadMatmulTest(TrtLayerAutoScanTest):
...
@@ -1035,6 +1040,11 @@ class TrtConvertVitToMultiHeadMatmulTest(TrtLayerAutoScanTest):
# for dynamic_shape
# for dynamic_shape
generate_dynamic_shape
(
attrs
)
generate_dynamic_shape
(
attrs
)
self
.
trt_param
.
workspace_size
=
2013265920
self
.
trt_param
.
workspace_size
=
2013265920
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Int8
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(),
(
1e-3
,
1e-3
,
)
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Half
self
.
trt_param
.
precision
=
paddle_infer
.
PrecisionType
.
Half
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(),
(
yield
self
.
create_inference_config
(),
generate_trt_nodes_num
(),
(
1e-3
,
1e-3
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录