Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
8e3a2940
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
8e3a2940
编写于
1月 13, 2021
作者:
C
cc
提交者:
GitHub
1月 13, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
skip quantizing ops in cpu inference (#30342)
* skip quantizing ops in cpu inference, test=develop
上级
ad6fee2f
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
38 addition
and
8 deletion
+38
-8
paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc
.../fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc
+4
-0
python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py
...luid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py
+33
-7
python/paddle/fluid/contrib/slim/tests/test_quant2_int8_mkldnn_pass.py
.../fluid/contrib/slim/tests/test_quant2_int8_mkldnn_pass.py
+1
-1
未找到文件。
paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc
浏览文件 @
8e3a2940
...
@@ -42,6 +42,10 @@ void CPUQuantizePlacementPass::ApplyImpl(ir::Graph* graph) const {
...
@@ -42,6 +42,10 @@ void CPUQuantizePlacementPass::ApplyImpl(ir::Graph* graph) const {
return
;
return
;
}
}
if
(
op
->
Op
()
->
GetAttrIfExists
<
int
>
(
"skip_quant"
)
==
1
)
{
return
;
}
if
(
op
->
Op
()
->
HasAttr
(
"mkldnn_data_type"
)
||
if
(
op
->
Op
()
->
HasAttr
(
"mkldnn_data_type"
)
||
op
->
Op
()
->
HasProtoAttr
(
"mkldnn_data_type"
))
{
op
->
Op
()
->
HasProtoAttr
(
"mkldnn_data_type"
))
{
// use_quantizer is no longer used
// use_quantizer is no longer used
...
...
python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py
浏览文件 @
8e3a2940
...
@@ -56,7 +56,8 @@ class Quant2Int8MkldnnPass(object):
...
@@ -56,7 +56,8 @@ class Quant2Int8MkldnnPass(object):
]
]
self
.
_fake_quantize_dequantize_types
=
[
self
.
_fake_quantize_dequantize_types
=
[
'fake_quantize_dequantize_abs_max'
,
'fake_quantize_dequantize_abs_max'
,
'fake_quantize_dequantize_moving_average_abs_max'
'fake_quantize_dequantize_moving_average_abs_max'
,
'fake_channel_wise_quantize_dequantize_abs_max'
]
]
self
.
_ops_to_quantize
=
_ops_to_quantize
self
.
_ops_to_quantize
=
_ops_to_quantize
self
.
_op_ids_to_skip
=
_op_ids_to_skip
if
_op_ids_to_skip
is
not
None
else
set
(
self
.
_op_ids_to_skip
=
_op_ids_to_skip
if
_op_ids_to_skip
is
not
None
else
set
(
...
@@ -71,7 +72,7 @@ class Quant2Int8MkldnnPass(object):
...
@@ -71,7 +72,7 @@ class Quant2Int8MkldnnPass(object):
self
.
_relu_ops
=
[
'relu'
,
'relu6'
]
self
.
_relu_ops
=
[
'relu'
,
'relu6'
]
self
.
_matmul_ops
=
[
'matmul'
]
self
.
_matmul_ops
=
[
'matmul'
]
self
.
_gru_ops
=
[
'fusion_gru'
,
'multi_gru'
]
self
.
_gru_ops
=
[
'fusion_gru'
,
'multi_gru'
]
self
.
_weight_
scale
s
=
{}
self
.
_weight_
threshold
s
=
{}
# Collect the Input and Output sclaes from Fake quant models
# Collect the Input and Output sclaes from Fake quant models
self
.
_var_quant_scales
=
{}
self
.
_var_quant_scales
=
{}
self
.
_max_range
=
{}
self
.
_max_range
=
{}
...
@@ -84,7 +85,8 @@ class Quant2Int8MkldnnPass(object):
...
@@ -84,7 +85,8 @@ class Quant2Int8MkldnnPass(object):
IrGraph
),
'graph must be the instance of IrGraph.'
IrGraph
),
'graph must be the instance of IrGraph.'
self
.
_reset_pass_idx_and_group
(
'int8'
)
self
.
_reset_pass_idx_and_group
(
'int8'
)
graph
=
self
.
_gather_weight_scales_from_fake
(
graph
)
graph
=
self
.
_label_skip_quantized_op
(
graph
)
graph
=
self
.
_gather_weight_thresholds_from_fake
(
graph
)
graph
=
self
.
_gather_output_scales_from_attr
(
graph
)
graph
=
self
.
_gather_output_scales_from_attr
(
graph
)
graph
=
self
.
_gather_input_scales_from_fake
(
graph
)
graph
=
self
.
_gather_input_scales_from_fake
(
graph
)
graph
=
self
.
_remove_fake_ops
(
graph
)
graph
=
self
.
_remove_fake_ops
(
graph
)
...
@@ -135,6 +137,30 @@ class Quant2Int8MkldnnPass(object):
...
@@ -135,6 +137,30 @@ class Quant2Int8MkldnnPass(object):
def
_is_fc_quantized
(
self
,
graph
):
def
_is_fc_quantized
(
self
,
graph
):
return
self
.
_is_any_of_op_types_quantized
(
self
.
_fc_ops
,
graph
)
return
self
.
_is_any_of_op_types_quantized
(
self
.
_fc_ops
,
graph
)
def
_label_skip_quantized_op
(
self
,
graph
):
"""
For some ops(conv2d, depthwise_conv2d, mul, matml), find and label
the skip quantized ops. cpu_quantize_placement_pass will use the
label to identify it.
For static models, the skip quantized ops have `skip_quant` attr.
Therefore, it only needs to find and label the skip quantized ops for
dygraph models, in which the quantized ops don't have `quantization_type`
attr.
"""
target_ops
=
self
.
_conv_ops
+
self
.
_mul_ops
+
self
.
_matmul_ops
for
op_node
in
graph
.
all_op_nodes
():
if
op_node
.
name
()
in
target_ops
and
\
not
op_node
.
op
().
has_attr
(
"quantization_type"
):
is_quantized_op
=
True
for
var_node
in
op_node
.
inputs
:
for
front_op_node
in
var_node
.
inputs
:
if
"fake_quantize_dequantize_"
not
in
front_op_node
.
name
(
):
is_quantized_op
=
False
if
not
is_quantized_op
:
op_node
.
op
().
_set_attr
(
"skip_quant"
,
True
)
return
graph
def
_gather_input_scales_from_fake
(
self
,
graph
):
def
_gather_input_scales_from_fake
(
self
,
graph
):
def
_add_scale_for_vars
(
var_names
,
use_unsigned_int
,
lod_tensor
):
def
_add_scale_for_vars
(
var_names
,
use_unsigned_int
,
lod_tensor
):
scales
=
self
.
_var_quant_scales
scales
=
self
.
_var_quant_scales
...
@@ -165,19 +191,19 @@ class Quant2Int8MkldnnPass(object):
...
@@ -165,19 +191,19 @@ class Quant2Int8MkldnnPass(object):
return
graph
return
graph
def
_gather_weight_
scale
s_from_fake
(
self
,
graph
):
def
_gather_weight_
threshold
s_from_fake
(
self
,
graph
):
for
op
in
graph
.
all_op_nodes
():
for
op
in
graph
.
all_op_nodes
():
if
op
.
name
()
in
self
.
_fake_dequantize_types
:
if
op
.
name
()
in
self
.
_fake_dequantize_types
:
input_name
=
op
.
input
(
"X"
)[
0
]
input_name
=
op
.
input
(
"X"
)[
0
]
if
op
.
op
().
has_attr
(
"max_range"
):
if
op
.
op
().
has_attr
(
"max_range"
):
_max_range
=
np
.
array
(
op
.
op
().
attr
(
"max_range"
)).
astype
(
_max_range
=
np
.
array
(
op
.
op
().
attr
(
"max_range"
)).
astype
(
np
.
float64
)
np
.
float64
)
self
.
_weight_
scale
s
[
input_name
]
=
np
.
array
(
self
.
_weight_
threshold
s
[
input_name
]
=
np
.
array
(
self
.
_s8_max
*
self
.
_s8_max
/
self
.
_s8_max
*
self
.
_s8_max
/
_max_range
).
astype
(
np
.
float64
)
_max_range
).
astype
(
np
.
float64
)
else
:
else
:
scale_name
=
op
.
input
(
"Scales"
)[
0
]
scale_name
=
op
.
input
(
"Scales"
)[
0
]
self
.
_weight_
scale
s
[
input_name
]
=
np
.
array
(
self
.
_weight_
threshold
s
[
input_name
]
=
np
.
array
(
self
.
_load_param
(
self
.
_scope
,
scale_name
)).
astype
(
self
.
_load_param
(
self
.
_scope
,
scale_name
)).
astype
(
np
.
float64
)
np
.
float64
)
...
@@ -314,7 +340,7 @@ class Quant2Int8MkldnnPass(object):
...
@@ -314,7 +340,7 @@ class Quant2Int8MkldnnPass(object):
weight_var_name
=
op_node
.
input
(
weight_name
)[
0
]
weight_var_name
=
op_node
.
input
(
weight_name
)[
0
]
output_var_name
=
op_node
.
output
(
output_name
)[
0
]
output_var_name
=
op_node
.
output
(
output_name
)[
0
]
# Convert int8 range weights to fp32 range weights
# Convert int8 range weights to fp32 range weights
scales
=
self
.
_weight_
scale
s
[
output_var_name
]
scales
=
self
.
_weight_
threshold
s
[
output_var_name
]
weight
=
self
.
_load_param
(
self
.
_scope
,
weight_var_name
)
weight
=
self
.
_load_param
(
self
.
_scope
,
weight_var_name
)
if
scales
.
size
==
1
or
scales
.
size
==
weight
.
shape
[
0
]:
if
scales
.
size
==
1
or
scales
.
size
==
weight
.
shape
[
0
]:
w_fp32
=
np
.
multiply
(
np
.
divide
(
weight
,
self
.
_s8_max
).
T
,
scales
.
T
).
T
w_fp32
=
np
.
multiply
(
np
.
divide
(
weight
,
self
.
_s8_max
).
T
,
scales
.
T
).
T
...
...
python/paddle/fluid/contrib/slim/tests/test_quant2_int8_mkldnn_pass.py
浏览文件 @
8e3a2940
...
@@ -180,7 +180,7 @@ class TestQuant2Int8MkldnnPass(unittest.TestCase):
...
@@ -180,7 +180,7 @@ class TestQuant2Int8MkldnnPass(unittest.TestCase):
_place
=
self
.
place
,
_place
=
self
.
place
,
_core
=
core
,
_core
=
core
,
_debug
=
False
)
_debug
=
False
)
qpass
.
_weight_
scale
s
[
"mul_output"
]
=
self
.
mul_output_scale
qpass
.
_weight_
threshold
s
[
"mul_output"
]
=
self
.
mul_output_scale
param
=
self
.
scope
.
var
(
"mul_weights"
).
get_tensor
()
param
=
self
.
scope
.
var
(
"mul_weights"
).
get_tensor
()
param
.
set
(
self
.
variables_mul
[
"mul_weights"
],
self
.
place
)
param
.
set
(
self
.
variables_mul
[
"mul_weights"
],
self
.
place
)
qpass
.
_dequantize_op_weights
(
graph
,
op_node
,
"Y"
,
"Out"
)
qpass
.
_dequantize_op_weights
(
graph
,
op_node
,
"Y"
,
"Out"
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录