Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
12b9b03e
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
12b9b03e
编写于
10月 31, 2022
作者:
G
Guanghua Yu
提交者:
GitHub
10月 31, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[cherry-pick] update dygraph PTQ export_model api (#47415)
* update dygraph PTQ export_model api * remove postprocess
上级
df64e790
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
132 addition
and
81 deletion
+132
-81
python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py
.../paddle/fluid/contrib/slim/quantization/imperative/ptq.py
+109
-69
python/paddle/fluid/contrib/slim/quantization/imperative/ptq_registry.py
...luid/contrib/slim/quantization/imperative/ptq_registry.py
+23
-12
未找到文件。
python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py
浏览文件 @
12b9b03e
...
...
@@ -31,9 +31,9 @@ from .ptq_registry import PTQRegistry
__all__
=
[
'ImperativePTQ'
]
_logger
=
get_logger
(
__name__
,
logging
.
INFO
,
fmt
=
'%(asctime)s-%(levelname)s: %(message)s'
)
_logger
=
get_logger
(
__name__
,
logging
.
INFO
,
fmt
=
'%(asctime)s-%(levelname)s: %(message)s'
)
class
ImperativePTQ
(
object
):
...
...
@@ -75,17 +75,20 @@ class ImperativePTQ(object):
Return
quantized_model(paddle.nn.Layer): The quantized model.
"""
assert
isinstance
(
model
,
paddle
.
nn
.
Layer
),
\
"The model must be the instance of paddle.nn.Layer."
assert
isinstance
(
model
,
paddle
.
nn
.
Layer
),
"The model must be the instance of paddle.nn.Layer."
if
not
inplace
:
model
=
copy
.
deepcopy
(
model
)
if
fuse
:
model
.
eval
()
model
=
fuse_utils
.
fuse_layers
(
model
,
fuse_list
)
for
name
,
layer
in
model
.
named_sublayers
():
if
PTQRegistry
.
is_supported_layer
(
layer
)
\
and
utils
.
is_leaf_layer
(
layer
)
\
and
not
self
.
_is_skip_layer
(
layer
):
if
(
PTQRegistry
.
is_supported_layer
(
layer
)
and
utils
.
is_leaf_layer
(
layer
)
and
not
self
.
_is_skip_layer
(
layer
)
):
# Add quant config
quant_config
=
copy
.
deepcopy
(
self
.
_quant_config
)
...
...
@@ -98,7 +101,8 @@ class ImperativePTQ(object):
quant_hook_handle
=
layer
.
register_forward_post_hook
(
hook
)
quant_config
.
quant_hook_handle
=
quant_hook_handle
layer
.
_forward_post_hooks
.
move_to_end
(
quant_hook_handle
.
_hook_id
,
last
=
False
)
quant_hook_handle
.
_hook_id
,
last
=
False
)
return
model
...
...
@@ -117,7 +121,7 @@ class ImperativePTQ(object):
InputSpec or example Tensor. If None, all input variables of
the original Layer's forward method would be the inputs of
the saved model. Default None.
**config
s
(dict, optional): Other save configuration options for
**config (dict, optional): Other save configuration options for
compatibility. We do not recommend using these configurations,
they may be removed in the future. If not necessary, DO NOT use
them. Default None.
...
...
@@ -133,8 +137,9 @@ class ImperativePTQ(object):
None
"""
assert
isinstance
(
model
,
paddle
.
nn
.
Layer
),
\
"The model must be the instance of paddle.nn.Layer."
assert
isinstance
(
model
,
paddle
.
nn
.
Layer
),
"The model must be the instance of paddle.nn.Layer."
# Convert and save dygraph quantized model
self
.
_convert
(
model
)
...
...
@@ -156,12 +161,16 @@ class ImperativePTQ(object):
model_filename
=
basename
+
INFER_MODEL_SUFFIX
params_filename
=
basename
+
INFER_PARAMS_SUFFIX
[
infer_program
,
feed_target_names
,
fetch_targets
]
=
(
paddle
.
fluid
.
io
.
load_inference_model
(
[
infer_program
,
feed_target_names
,
fetch_targets
,
]
=
paddle
.
fluid
.
io
.
load_inference_model
(
dirname
=
dirname
,
executor
=
exe
,
model_filename
=
model_filename
,
params_filename
=
params_filename
))
params_filename
=
params_filename
,
)
# Process inference program
self
.
_clean_up
(
infer_program
)
...
...
@@ -169,13 +178,15 @@ class ImperativePTQ(object):
self
.
_remove_scale_op
(
infer_program
)
# Save final program
paddle
.
fluid
.
io
.
save_inference_model
(
dirname
=
dirname
,
paddle
.
fluid
.
io
.
save_inference_model
(
dirname
=
dirname
,
feeded_var_names
=
feed_target_names
,
target_vars
=
fetch_targets
,
executor
=
exe
,
main_program
=
infer_program
.
clone
(),
model_filename
=
model_filename
,
params_filename
=
params_filename
)
params_filename
=
params_filename
,
)
if
is_dynamic_mode
:
paddle
.
disable_static
()
...
...
@@ -213,8 +224,9 @@ class ImperativePTQ(object):
Returns:
None
"""
assert
isinstance
(
model
,
paddle
.
nn
.
Layer
),
\
"The input model must be the instance of paddle.nn.Layer."
assert
isinstance
(
model
,
paddle
.
nn
.
Layer
),
"The input model must be the instance of paddle.nn.Layer."
total_num
=
0
cur_num
=
0
...
...
@@ -226,8 +238,9 @@ class ImperativePTQ(object):
if
self
.
_is_quant_layer
(
sub_layer
):
cur_num
+=
1
if
cur_num
%
5
==
0
:
_logger
.
info
(
"Process the %s / %s layer"
%
(
cur_num
,
total_num
))
_logger
.
info
(
"Process the %s / %s layer"
%
(
cur_num
,
total_num
)
)
quant_config
=
sub_layer
.
_quant_config
...
...
@@ -236,7 +249,7 @@ class ImperativePTQ(object):
quant_config
.
out_act_quantizer
.
cal_thresholds
()
if
PTQRegistry
.
is_simulated_quant_layer
(
sub_layer
):
weights
=
(
sub_layer
.
weight
,
)
weights
=
(
sub_layer
.
weight
,)
quant_config
.
wt_quantizer
.
sample_data
(
sub_layer
,
weights
)
quant_config
.
wt_quantizer
.
cal_thresholds
()
...
...
@@ -250,18 +263,25 @@ class ImperativePTQ(object):
Returns:
None
"""
assert
isinstance
(
sub_layer
,
paddle
.
nn
.
Layer
),
\
"The input model must be the instance of paddle.nn.Layer."
assert
isinstance
(
sub_layer
,
paddle
.
nn
.
Layer
),
"The input model must be the instance of paddle.nn.Layer."
layer_info
=
PTQRegistry
.
layer_info
(
sub_layer
)
output_names
=
layer_info
.
output_names
output_thresholds
=
quant_config
.
out_act_quantizer
.
thresholds
assert
len
(
output_names
)
==
1
assert
len
(
output_thresholds
)
==
1
if
len
(
output_thresholds
)
==
1
:
save_name
=
output_names
[
0
]
+
str
(
0
)
+
"_threshold"
sub_layer
.
_set_op_attrs
({
save_name
:
output_thresholds
[
0
]})
sub_layer
.
_set_op_attrs
({
"out_threshold"
:
output_thresholds
[
0
]})
else
:
_logger
.
warning
(
"output_thresholds shape of {} need to be 1, but received {}"
.
format
(
output_names
[
0
],
len
(
output_thresholds
)
)
)
def
_wrap_simulated_layers
(
self
,
model
):
"""
...
...
@@ -272,12 +292,14 @@ class ImperativePTQ(object):
Returns:
None
"""
assert
isinstance
(
model
,
paddle
.
nn
.
Layer
),
\
"The input model must be the instance of paddle.nn.Layer."
assert
isinstance
(
model
,
paddle
.
nn
.
Layer
),
"The input model must be the instance of paddle.nn.Layer."
for
name
,
sub_layer
in
model
.
named_sublayers
():
if
self
.
_is_quant_layer
(
sub_layer
)
\
and
PTQRegistry
.
is_simulated_quant_layer
(
sub_layer
):
if
self
.
_is_quant_layer
(
sub_layer
)
and
PTQRegistry
.
is_simulated_quant_layer
(
sub_layer
):
quant_config
=
sub_layer
.
_quant_config
assert
quant_config
.
enable_in_act_quantizer
==
True
...
...
@@ -303,36 +325,44 @@ class ImperativePTQ(object):
"activation_bits"
:
in_act_quantizer
.
quant_bits
,
}
quant_layer
=
quant_layers
.
__dict__
[
quant_layer_name
](
sub_layer
,
**
kwargs
)
quant_layer
=
quant_layers
.
__dict__
[
quant_layer_name
](
sub_layer
,
**
kwargs
)
# save the input thresholds
assert
hasattr
(
quant_layer
,
"_fake_quant_input"
)
assert
hasattr
(
quant_layer
.
_fake_quant_input
,
"_scale"
)
assert
len
(
in_act_quantizer
.
thresholds
)
==
1
input_threshold
=
np
.
array
([
in_act_quantizer
.
thresholds
[
0
]],
dtype
=
np
.
float32
)
quant_layer
.
_fake_quant_input
.
_scale
.
set_value
(
input_threshold
)
if
len
(
in_act_quantizer
.
thresholds
)
==
1
:
input_threshold
=
np
.
array
(
[
in_act_quantizer
.
thresholds
[
0
]],
dtype
=
np
.
float32
)
quant_layer
.
_fake_quant_input
.
_scale
.
set_value
(
input_threshold
)
assert
hasattr
(
quant_layer
,
"_fake_quant_weight"
)
assert
hasattr
(
quant_layer
.
_fake_quant_weight
,
"_scale"
)
assert
len
(
wt_quantizer
.
thresholds
)
==
1
weight_threshold
=
wt_quantizer
.
thresholds
[
0
]
if
isinstance
(
weight_threshold
,
list
):
weight_threshold
=
np
.
array
(
weight_threshold
,
dtype
=
np
.
float32
)
weight_threshold
=
np
.
array
(
weight_threshold
,
dtype
=
np
.
float32
)
else
:
weight_threshold
=
np
.
array
([
weight_threshold
],
dtype
=
np
.
float32
)
weight_threshold
=
np
.
array
(
[
weight_threshold
],
dtype
=
np
.
float32
)
quant_layer
.
_fake_quant_weight
.
_scale
.
set_value
(
weight_threshold
)
weight_threshold
)
# save the output thresholds
self
.
_save_output_thresholds
(
quant_layer
,
quant_config
)
# replace the layer
parent_layer
,
sub_name
=
\
utils
.
find_parent_layer_and_sub_name
(
model
,
name
)
parent_layer
,
sub_name
=
utils
.
find_parent_layer_and_sub_name
(
model
,
name
)
setattr
(
parent_layer
,
sub_name
,
quant_layer
)
def
_gather_input_thresholds
(
self
,
program
,
scope
):
...
...
@@ -351,30 +381,37 @@ class ImperativePTQ(object):
if
previous_op
is
None
:
continue
if
"quantize_dequantize"
in
previous_op
.
type
or
\
previous_op
.
type
==
"moving_average_abs_max_scale"
:
if
(
"quantize_dequantize"
in
previous_op
.
type
or
previous_op
.
type
==
"moving_average_abs_max_scale"
):
attr_name
=
previous_op
.
output
(
'OutScale'
)[
0
]
in_threshold
=
utils
.
load_variable_data
(
scope
,
attr_name
)
in_threshold
=
utils
.
fp_numpy_to_naive
(
in_threshold
)
argname
,
index
=
utils
.
_get_input_name_index
(
op
,
in_var_name
)
op
.
_set_attr
(
argname
+
str
(
index
)
+
"_threshold"
,
in_threshold
)
op
,
in_var_name
)
op
.
_set_attr
(
argname
+
str
(
index
)
+
"_threshold"
,
in_threshold
)
op
.
_set_attr
(
"with_quant_attr"
,
True
)
else
:
for
out_var_name
in
utils
.
_get_op_output_var_names
(
previous_op
):
previous_op
):
if
out_var_name
!=
in_var_name
:
continue
argname
,
index
=
utils
.
_get_output_name_index
(
previous_op
,
out_var_name
)
previous_op
,
out_var_name
)
attr_name
=
argname
+
str
(
index
)
+
"_threshold"
if
not
previous_op
.
has_attr
(
attr_name
):
continue
threshold
=
previous_op
.
attr
(
attr_name
)
argname
,
index
=
utils
.
_get_input_name_index
(
op
,
in_var_name
)
op
,
in_var_name
)
attr_name
=
argname
+
str
(
index
)
+
"_threshold"
op
.
_set_attr
(
attr_name
,
threshold
)
op
.
_set_attr
(
"with_quant_attr"
,
True
)
...
...
@@ -390,8 +427,11 @@ class ImperativePTQ(object):
"""
def
_helper
(
op
,
next_op
,
old_attr_name
,
new_attr_name
):
if
op
.
has_attr
(
old_attr_name
)
and
next_op
.
has_attr
(
old_attr_name
)
\
and
op
.
attr
(
old_attr_name
)
==
next_op
.
attr
(
old_attr_name
):
if
(
op
.
has_attr
(
old_attr_name
)
and
next_op
.
has_attr
(
old_attr_name
)
and
op
.
attr
(
old_attr_name
)
==
next_op
.
attr
(
old_attr_name
)
):
threshold
=
op
.
attr
(
old_attr_name
)
op
.
_remove_attr
(
old_attr_name
)
next_op
.
_remove_attr
(
old_attr_name
)
...
...
@@ -417,8 +457,8 @@ class ImperativePTQ(object):
old_attr_name
=
argname
+
str
(
index
)
+
"_threshold"
argname
,
index
=
utils
.
_get_output_name_index
(
next_op
,
next_op
.
output
(
"Out"
)[
0
]
)
next_op
,
next_op
.
output
(
"Out"
)[
0
]
)
new_attr_name
=
argname
+
str
(
index
)
+
"_threshold"
_helper
(
op
,
next_op
,
old_attr_name
,
new_attr_name
)
...
...
python/paddle/fluid/contrib/slim/quantization/imperative/ptq_registry.py
浏览文件 @
12b9b03e
...
...
@@ -41,6 +41,7 @@ PTQ_LAYERS_INFO = [
LayerInfo
(
paddle
.
nn
.
ReLU
,
[
'X'
],
[],
[
'Out'
]),
LayerInfo
(
paddle
.
nn
.
ReLU6
,
[
'X'
],
[],
[
'Out'
]),
LayerInfo
(
paddle
.
nn
.
Hardswish
,
[
'X'
],
[],
[
'Out'
]),
LayerInfo
(
paddle
.
nn
.
Swish
,
[
'X'
],
[],
[
'Out'
]),
LayerInfo
(
paddle
.
nn
.
Sigmoid
,
[
'X'
],
[],
[
'Out'
]),
LayerInfo
(
paddle
.
nn
.
Softmax
,
[
'X'
],
[],
[
'Out'
]),
LayerInfo
(
paddle
.
nn
.
Tanh
,
[
'X'
],
[],
[
'Out'
]),
...
...
@@ -48,10 +49,15 @@ PTQ_LAYERS_INFO = [
]
QUANT_LAYERS_INFO
=
[
LayerInfo
(
paddle
.
nn
.
quant
.
quant_layers
.
QuantizedConv2D
,
[
'Input'
],
[
'Filter'
],
[
'Output'
]),
LayerInfo
(
paddle
.
nn
.
quant
.
quant_layers
.
QuantizedLinear
,
[
'X'
],
[
'Y'
],
[
'Out'
]),
LayerInfo
(
paddle
.
nn
.
quant
.
quant_layers
.
QuantizedConv2D
,
[
'Input'
],
[
'Filter'
],
[
'Output'
],
),
LayerInfo
(
paddle
.
nn
.
quant
.
quant_layers
.
QuantizedLinear
,
[
'X'
],
[
'Y'
],
[
'Out'
]
),
]
SIMULATED_LAYERS
=
[
paddle
.
nn
.
Conv2D
,
paddle
.
nn
.
Linear
]
...
...
@@ -61,6 +67,7 @@ class PTQRegistry(object):
"""
Register the supported layers for PTQ and provide layers info.
"""
supported_layers_map
=
{}
registered_layers_map
=
{}
is_inited
=
False
...
...
@@ -89,8 +96,9 @@ class PTQRegistry(object):
flag(bool): Whther the layer is supported.
"""
cls
.
_init
()
return
layer
in
cls
.
supported_layers_map
or
\
isinstance
(
layer
,
tuple
(
cls
.
supported_layers_map
.
keys
()))
return
layer
in
cls
.
supported_layers_map
or
isinstance
(
layer
,
tuple
(
cls
.
supported_layers_map
.
keys
())
)
@
classmethod
def
is_registered_layer
(
cls
,
layer
):
...
...
@@ -102,8 +110,9 @@ class PTQRegistry(object):
flag(bool): Wether the layer is register layer_info.
"""
cls
.
_init
()
return
layer
in
cls
.
registered_layers_map
or
\
isinstance
(
layer
,
tuple
(
cls
.
registered_layers_map
.
keys
()))
return
layer
in
cls
.
registered_layers_map
or
isinstance
(
layer
,
tuple
(
cls
.
registered_layers_map
.
keys
())
)
@
classmethod
def
is_simulated_quant_layer
(
cls
,
layer
):
...
...
@@ -114,8 +123,9 @@ class PTQRegistry(object):
Returns:
flag(bool): Whther the layer is supported.
"""
return
layer
in
SIMULATED_LAYERS
or
\
isinstance
(
layer
,
tuple
(
SIMULATED_LAYERS
))
return
layer
in
SIMULATED_LAYERS
or
isinstance
(
layer
,
tuple
(
SIMULATED_LAYERS
)
)
@
classmethod
def
layer_info
(
cls
,
layer
):
...
...
@@ -126,8 +136,9 @@ class PTQRegistry(object):
Returns:
layer_info(LayerInfo): The layer info of the input layer.
"""
assert
cls
.
is_registered_layer
(
layer
),
\
"The input layer is not register."
assert
cls
.
is_registered_layer
(
layer
),
"The input layer is not register."
for
layer_key
,
layer_info
in
cls
.
registered_layers_map
.
items
():
if
layer
==
layer_key
or
isinstance
(
layer
,
layer_key
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录