Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
7eef05c2
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
7eef05c2
编写于
10月 17, 2022
作者:
G
Guanghua Yu
提交者:
GitHub
10月 17, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[cherry-pick] Fix the bug of exporting model in dygraph QAT (#47028)
* fix dygraph new format quant * fix unittest * fix conflict
上级
8c6c79ac
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
37 addition
and
27 deletion
+37
-27
python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
.../paddle/fluid/contrib/slim/quantization/imperative/qat.py
+30
-23
python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
...ddle/fluid/contrib/slim/quantization/quantization_pass.py
+1
-0
python/paddle/fluid/contrib/slim/quantization/utils.py
python/paddle/fluid/contrib/slim/quantization/utils.py
+3
-1
python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py
...on/paddle/fluid/contrib/slim/tests/test_imperative_qat.py
+3
-3
未找到文件。
python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
浏览文件 @
7eef05c2
...
...
@@ -72,7 +72,8 @@ class ImperativeQuantAware(object):
weight_preprocess_layer
=
None
,
act_preprocess_layer
=
None
,
weight_quantize_layer
=
None
,
act_quantize_layer
=
None
):
act_quantize_layer
=
None
,
onnx_format
=
False
):
"""
The constructor for ImperativeQuantAware.
...
...
@@ -124,6 +125,8 @@ class ImperativeQuantAware(object):
activation and returns dequantized activation.
If None, will use quantization op defined by 'activation_quantize_type'.
Default is None.
onnx_format (bool, optional): Whether to export the quantized model
with format of ONNX. Default is False.
Note:
If user sets attribute 'skip_quant' to a Layer that support dynamic
...
...
@@ -224,7 +227,7 @@ class ImperativeQuantAware(object):
self
.
_quantize_inputs
=
ImperativeQuantizeInputs
(
**
kwargs
)
self
.
_quantize_outputs
=
ImperativeQuantizeOutputs
(
moving_rate
,
activation_bits
)
moving_rate
,
activation_bits
,
onnx_format
)
def
quantize
(
self
,
model
):
"""
...
...
@@ -413,7 +416,7 @@ class ImperativeQuantizeOutputs(object):
Calculate the output scales for target layers.
"""
def
__init__
(
self
,
moving_rate
=
0.9
,
activation_bits
=
8
):
def
__init__
(
self
,
moving_rate
=
0.9
,
activation_bits
=
8
,
onnx_format
=
False
):
"""
The constructor for ImperativeQuantizeOutputs.
...
...
@@ -425,6 +428,7 @@ class ImperativeQuantizeOutputs(object):
super
(
ImperativeQuantizeOutputs
,
self
).
__init__
()
self
.
_moving_rate
=
moving_rate
self
.
_activation_bits
=
activation_bits
self
.
_onnx_format
=
onnx_format
def
apply
(
self
,
model
):
"""
...
...
@@ -461,12 +465,7 @@ class ImperativeQuantizeOutputs(object):
setattr
(
parent_layer
,
sub_name
,
cur_quant_layer
)
def
save_quantized_model
(
self
,
model
,
path
,
input_spec
=
None
,
onnx_format
=
False
,
**
config
):
def
save_quantized_model
(
self
,
model
,
path
,
input_spec
=
None
,
**
config
):
"""
Save the quantized model for the inference.
...
...
@@ -479,8 +478,6 @@ class ImperativeQuantizeOutputs(object):
InputSpec or example Tensor. If None, all input variables of
the original Layer's forward method would be the inputs of
the saved model. Default None.
onnx_format (bool, optional): Whether to export the quantized model
with format of ONNX. Default is False.
**config (dict, optional): Other save configuration options for
compatibility. We do not recommend using these configurations,
they may be removed in the future. If not necessary, DO NOT use
...
...
@@ -521,7 +518,7 @@ class ImperativeQuantizeOutputs(object):
model_filename
=
model_filename
,
params_filename
=
params_filename
))
if
not
onnx_format
:
if
not
self
.
_
onnx_format
:
self
.
_gather_scales
(
infer_program
,
scope
,
fetch_targets
)
# Remove `moving_average_abs_max_scale` node in sub graphs.
...
...
@@ -540,10 +537,14 @@ class ImperativeQuantizeOutputs(object):
graph
=
IrGraph
(
core
.
Graph
(
infer_program
.
desc
),
for_test
=
False
)
transform_pass
=
ReplaceFakeQuantDequantPass
(
scope
,
place
,
quant_bits
=
self
.
_activation_bits
)
transform_pass
.
apply
(
graph
)
for
sub_graph
in
graph
.
all_sub_graphs
():
sub_graph
.
_for_test
=
True
transform_pass
.
apply
(
sub_graph
)
quant_weight_pass
=
QuantWeightPass
(
scope
,
place
)
quant_weight_pass
.
apply
(
graph
)
for
sub_graph
in
graph
.
all_sub_graphs
():
sub_graph
.
_for_test
=
True
quant_weight_pass
.
apply
(
sub_graph
)
infer_program
=
graph
.
to_program
()
...
...
@@ -565,18 +566,24 @@ class ImperativeQuantizeOutputs(object):
"""
Whether the layer needs to calculate output scales.
"""
# exclude fake_quant ops in quant_layers file
if
not
isinstance
(
layer
,
dygraph
.
Layer
):
return
False
if
self
.
_onnx_format
:
return
True
if
isinstance
(
layer
,
tuple
(
utils
.
fake_quant_wrap_layers
))
else
False
flag
=
False
if
isinstance
(
layer
,
dygraph
.
Layer
):
# exclude fake_quant ops in quant_layers file
if
utils
.
is_leaf_layer
(
layer
)
and
\
not
isinstance
(
layer
,
tuple
(
utils
.
fake_quant_leaf_layers
)):
flag
=
True
if
utils
.
is_leaf_layer
(
layer
)
and
\
not
isinstance
(
layer
,
tuple
(
utils
.
fake_quant_leaf_layers
)):
flag
=
True
if
isinstance
(
layer
,
tuple
(
utils
.
fake_quant_wrap_layers
)):
flag
=
True
if
isinstance
(
layer
,
tuple
(
utils
.
fake_quant_wrap_layers
)):
flag
=
True
if
isinstance
(
layer
,
paddle
.
nn
.
quant
.
FloatFunctionalLayer
):
flag
=
True
if
isinstance
(
layer
,
paddle
.
nn
.
quant
.
FloatFunctionalLayer
):
flag
=
True
return
flag
...
...
python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
浏览文件 @
7eef05c2
...
...
@@ -59,6 +59,7 @@ _fake_dequant_op_list = [
_fake_quant_dequant_op_list
=
[
'fake_quantize_dequantize_moving_average_abs_max'
,
"fake_channel_wise_quantize_dequantize_abs_max"
,
"fake_quantize_dequantize_abs_max"
,
]
_conv_ops
=
[
'conv2d'
,
'depthwise_conv2d'
,
'conv2d_transpose'
]
...
...
python/paddle/fluid/contrib/slim/quantization/utils.py
浏览文件 @
7eef05c2
...
...
@@ -332,9 +332,11 @@ def quant_tensor(x, scale, quant_axis=0, weight_bits=8, onnx_format=False):
x
[
x
<
-
scale
]
=
-
scale
return
x
assert
quant_axis
in
[
0
,
1
],
'quant_axis should be 0 or 1 for now.'
bnt
=
(
1
<<
(
weight_bits
-
1
))
-
1
if
isinstance
(
scale
,
list
)
and
len
(
scale
)
==
1
:
scale
=
scale
[
0
]
if
isinstance
(
scale
,
list
):
assert
quant_axis
in
[
0
,
1
],
'quant_axis should be 0 or 1 for now.'
for
i
,
s
in
enumerate
(
scale
):
if
s
==
0.0
:
s
=
1e-8
...
...
python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py
浏览文件 @
7eef05c2
...
...
@@ -68,7 +68,8 @@ class TestImperativeQat(unittest.TestCase):
imperative_qat
=
ImperativeQuantAware
(
weight_quantize_type
=
self
.
weight_quantize_type
,
activation_quantize_type
=
self
.
activation_quantize_type
,
fuse_conv_bn
=
self
.
fuse_conv_bn
)
fuse_conv_bn
=
self
.
fuse_conv_bn
,
onnx_format
=
self
.
onnx_format
)
with
fluid
.
dygraph
.
guard
():
# For CI coverage
...
...
@@ -187,8 +188,7 @@ class TestImperativeQat(unittest.TestCase):
input_spec
=
[
paddle
.
static
.
InputSpec
(
shape
=
[
None
,
1
,
28
,
28
],
dtype
=
'float32'
)
],
onnx_format
=
self
.
onnx_format
)
])
print
(
'Quantized model saved in %s'
%
tmpdir
)
if
core
.
is_compiled_with_cuda
():
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录