Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
a59f215d
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
a59f215d
编写于
7月 28, 2021
作者:
W
Wangzheee
提交者:
GitHub
7月 28, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add quant_dequant_matmul (#34359)
上级
68b4a2c3
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
33 addition
and
9 deletion
+33
-9
paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc
paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc
+33
-9
未找到文件。
paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc
浏览文件 @
a59f215d
...
...
@@ -31,6 +31,7 @@ QuantDequantFusePass::QuantDequantFusePass() {
.
End
()
.
AddInput
(
"Iter"
)
.
IsTensor
()
.
IsOptional
()
.
End
()
.
AddOutput
(
"Out"
)
.
IsTensor
()
...
...
@@ -40,6 +41,7 @@ QuantDequantFusePass::QuantDequantFusePass() {
.
End
()
.
AddOutput
(
"OutScales"
)
.
IsTensor
()
.
IsOptional
()
.
End
()
.
AddAttr
(
"window_size"
)
.
IsType
<
int
>
()
...
...
@@ -167,6 +169,26 @@ QuantDequantFusePass::QuantDequantFusePass() {
.
AddAttr
(
"y_num_col_dims"
)
.
IsNumEQ
(
1
)
.
End
();
AddOpCompat
(
OpCompat
(
"matmul"
))
.
AddInput
(
"X"
)
.
IsTensor
()
.
End
()
.
AddInput
(
"Y"
)
.
IsTensor
()
.
End
()
.
AddOutput
(
"Out"
)
.
IsTensor
()
.
End
()
.
AddAttr
(
"alpha"
)
.
IsNumGE
(
0.99
f
)
.
IsNumLE
(
1.01
f
)
.
End
()
.
AddAttr
(
"transpose_X"
)
.
IsBoolEQ
(
false
)
.
End
()
.
AddAttr
(
"transpose_Y"
)
.
IsBoolEQ
(
false
)
.
End
();
AddOpCompat
(
OpCompat
(
"fc"
))
.
AddInput
(
"Input"
)
.
IsTensor
()
...
...
@@ -291,7 +313,7 @@ void QuantDequantFusePass::DeleteQuant(ir::Graph* graph, Scope* scope,
quantized_op_type
==
"fc"
||
quantized_op_type
==
"conv2d_transpose"
)
{
op_desc
->
SetAttr
(
"Input_scale"
,
scale_value
);
}
else
if
(
quantized_op_type
==
"mul"
)
{
}
else
if
(
quantized_op_type
==
"mul"
||
quantized_op_type
==
"matmul"
)
{
op_desc
->
SetAttr
(
"X_scale"
,
scale_value
);
}
else
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
...
...
@@ -323,7 +345,7 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph, Scope* scope,
quantized_op_type
==
"conv2d_transpose"
)
{
weight_name
=
"Filter"
;
input_name
=
"Input"
;
}
else
if
(
quantized_op_type
==
"mul"
)
{
}
else
if
(
quantized_op_type
==
"mul"
||
quantized_op_type
==
"matmul"
)
{
weight_name
=
"Y"
;
input_name
=
"X"
;
}
else
if
(
quantized_op_type
==
"fc"
)
{
...
...
@@ -332,7 +354,7 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph, Scope* scope,
}
else
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"QuantDequantFuse: We only support conv2d, conv2d_fusion, "
"conv2d_transpose, fc, mul for "
"conv2d_transpose, fc, mul
, matmul
for "
"now."
));
}
const
std
::
string
pattern_name
=
"dequant_fuse"
;
...
...
@@ -410,12 +432,13 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph, Scope* scope,
// If quantized op is fc, weight scale size = 1;
// If quantized op is conv2d, weight scale size = weight dims[0]
// If quantized op is conv2d_transpose, weight scale size = weight dims[1]
if
(
quantized_op_type
==
"mul"
||
quantized_op_type
==
"fc"
)
{
if
(
quantized_op_type
==
"mul"
||
quantized_op_type
==
"matmul"
||
quantized_op_type
==
"fc"
)
{
if
(
dequant_type
==
"fake_dequantize_max_abs"
)
{
PADDLE_ENFORCE_EQ
(
weight_scale
.
size
(),
1
,
platform
::
errors
::
InvalidArgument
(
"mul op weight dequantized by [fake_dequantize_max_abs] "
"mul
/matmul
op weight dequantized by [fake_dequantize_max_abs] "
"requires weight scale size = 1, but got %d."
,
weight_scale
.
size
()));
for
(
int
j
=
0
;
j
<
weight_tensor
->
numel
();
j
++
)
{
...
...
@@ -426,9 +449,10 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph, Scope* scope,
PADDLE_ENFORCE_EQ
(
weight_scale
.
size
(),
static_cast
<
size_t
>
(
w_dims
[
1
]),
platform
::
errors
::
InvalidArgument
(
"mul op weight dequantized by "
"mul
/matmul
op weight dequantized by "
"[fake_channel_wise_dequantize_max_abs] requires weight scale "
"size = 2nd dim of mul's weight, which is %d, but got %d."
,
"size = 2nd dim of mul/matmul's weight, which is %d, but got "
"%d."
,
static_cast
<
size_t
>
(
w_dims
[
1
]),
weight_scale
.
size
()));
for
(
int
j
=
0
;
j
<
weight_tensor
->
numel
();
j
++
)
{
quantized_weight_data
[
j
]
*=
weight_scale
[
j
%
w_dims
[
1
]];
...
...
@@ -493,7 +517,7 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph, Scope* scope,
}
else
if
(
quantized_op_type
==
"fc"
)
{
new_op_desc
.
SetInput
(
"Input"
,
{
new_input
});
new_op_desc
.
SetOutput
(
"Out"
,
{
new_output
});
}
else
if
(
quantized_op_type
==
"mul"
)
{
}
else
if
(
quantized_op_type
==
"mul"
||
quantized_op_type
==
"matmul"
)
{
new_op_desc
.
SetInput
(
"X"
,
{
new_input
});
new_op_desc
.
SetOutput
(
"Out"
,
{
new_output
});
}
...
...
@@ -520,7 +544,7 @@ void QuantDequantFusePass::ApplyImpl(ir::Graph* graph) const {
std
::
unordered_set
<
std
::
string
>
quant_types
=
{
"fake_quantize_range_abs_max"
,
"fake_quantize_moving_average_abs_max"
};
std
::
unordered_set
<
std
::
string
>
quantized_op_types
=
{
"conv2d"
,
"mul"
,
"depthwise_conv2d"
,
"fc"
,
"conv2d_transpose"
};
"conv2d"
,
"mul"
,
"
matmul"
,
"
depthwise_conv2d"
,
"fc"
,
"conv2d_transpose"
};
auto
*
scope
=
param_scope
();
for
(
auto
&
quant_type
:
quant_types
)
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录