Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
3d5a27f0
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
3d5a27f0
编写于
3月 28, 2022
作者:
G
Guanghua Yu
提交者:
GitHub
3月 28, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add adaround post-quant method (#38460)
* add adaround post-quant method
上级
56dc8c79
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
709 addition
and
146 deletion
+709
-146
python/paddle/fluid/contrib/slim/quantization/adaround.py
python/paddle/fluid/contrib/slim/quantization/adaround.py
+309
-0
python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
...d/contrib/slim/quantization/post_training_quantization.py
+64
-45
python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
...ddle/fluid/contrib/slim/quantization/quantization_pass.py
+24
-43
python/paddle/fluid/contrib/slim/quantization/utils.py
python/paddle/fluid/contrib/slim/quantization/utils.py
+95
-0
python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_lstm_model.py
.../slim/tests/test_post_training_quantization_lstm_model.py
+35
-9
python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py
...ntrib/slim/tests/test_post_training_quantization_mnist.py
+58
-27
python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py
...slim/tests/test_post_training_quantization_mobilenetv1.py
+120
-19
python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_resnet50.py
...ib/slim/tests/test_post_training_quantization_resnet50.py
+4
-3
未找到文件。
python/paddle/fluid/contrib/slim/quantization/adaround.py
0 → 100644
浏览文件 @
3d5a27f0
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
numpy
as
np
import
time
import
sys
import
logging
import
paddle.fluid
as
fluid
from
....log_helper
import
get_logger
from
.utils
import
load_variable_data
,
set_variable_data
,
stable_sigmoid
,
quant_tensor
,
dequant_tensor
,
_channelwise_quant_axis1_ops
,
calculate_quant_cos_error
_logger
=
get_logger
(
__name__
,
logging
.
INFO
,
fmt
=
'%(asctime)s-%(levelname)s: %(message)s'
)
GAMMA
=
-
0.1
ZETA
=
1.1
def
compute_soft_rounding
(
alpha_v
):
return
fluid
.
layers
.
clip
(
fluid
.
layers
.
sigmoid
(
alpha_v
)
*
(
ZETA
-
GAMMA
)
+
GAMMA
,
min
=
0
,
max
=
1
)
def
compute_soft_rounding_np
(
alpha_v
):
return
np
.
clip
(
stable_sigmoid
(
alpha_v
)
*
(
ZETA
-
GAMMA
)
+
GAMMA
,
a_min
=
0
,
a_max
=
1
)
class
AdaRoundLoss
(
object
):
def
__init__
(
self
,
reg_param
=
0.01
,
default_beta_range
=
(
20
,
2
)):
self
.
default_reg_param
=
reg_param
self
.
default_beta_range
=
default_beta_range
def
compute_recon_loss
(
self
,
ada_quantized_output
,
orig_output
):
square_cost
=
fluid
.
layers
.
square_error_cost
(
ada_quantized_output
,
orig_output
)
recon_loss
=
fluid
.
layers
.
reduce_mean
(
fluid
.
layers
.
reduce_sum
(
square_cost
,
dim
=-
1
))
return
recon_loss
def
compute_round_loss
(
self
,
alpha_v
,
warm_start
,
beta
):
def
round_loss_fn
():
# compute rectified sigmoid of parameter 'alpha' which maps it between zero and one
h_v
=
compute_soft_rounding
(
alpha_v
)
# calculate regularization term - which ensures parameter to converge to exactly zeros and ones
# at the end of optimization
reg_term
=
fluid
.
layers
.
reduce_sum
(
-
fluid
.
layers
.
pow
(
fluid
.
layers
.
abs
(
2
*
h_v
-
1
),
factor
=
beta
)
+
1
)
# calculate the rounding loss
round_loss
=
self
.
default_reg_param
*
reg_term
return
round_loss
round_loss
=
fluid
.
layers
.
cond
(
warm_start
,
lambda
:
fluid
.
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
0.0
),
round_loss_fn
)
return
round_loss
def
compute_beta
(
self
,
max_iter
,
cur_iter
,
warm_start
):
# Start and stop beta for annealing of rounding loss (start_beta, end_beta)
start_beta
,
end_beta
=
self
.
default_beta_range
# iteration at end of warm start period, which is 20% of max iterations
warm_start_end_iter
=
warm_start
*
max_iter
# compute relative iteration of current iteration
rel_iter
=
(
cur_iter
-
warm_start_end_iter
)
/
(
max_iter
-
warm_start_end_iter
)
beta
=
end_beta
+
0.5
*
(
start_beta
-
end_beta
)
*
(
1
+
np
.
cos
(
rel_iter
*
np
.
pi
))
return
beta
class
AdaRound
(
object
):
def
__init__
(
self
,
scale
,
weight_tensor
,
scope
=
None
,
weight_var_name
=
None
,
weight_op_type
=
None
,
is_train
=
True
,
num_iterations
=
1000
):
self
.
is_train
=
is_train
self
.
num_iterations
=
num_iterations
self
.
warm_start
=
0.1
self
.
weight_bits
=
8
self
.
offset
=
0.
# zero-point offset
self
.
adaround_loss
=
AdaRoundLoss
()
self
.
ori_weight_tensor
=
weight_tensor
self
.
scale
=
scale
self
.
scope
=
scope
self
.
quant_axis
=
0
if
weight_op_type
in
_channelwise_quant_axis1_ops
:
self
.
quant_axis
=
1
self
.
weight_var_name
=
weight_var_name
self
.
alpha_name
=
weight_var_name
+
".alpha"
self
.
initialize_alpha
(
weight_tensor
.
copy
(),
scale
,
weight_var_name
)
def
initialize_alpha
(
self
,
tensor
,
scale
,
var_name
):
"""
Initializes alpha parameter, same shape as the weight tensor
"""
tensor_scale
=
quant_tensor
(
tensor
,
scale
,
quant_axis
=
self
.
quant_axis
)
tensor_floor
=
np
.
floor
(
tensor_scale
)
tensor
=
tensor_scale
-
tensor_floor
alpha
=
-
np
.
log
((
ZETA
-
GAMMA
)
/
(
tensor
-
GAMMA
)
-
1
)
self
.
alpha_v
=
fluid
.
layers
.
create_parameter
(
shape
=
alpha
.
shape
,
dtype
=
"float32"
,
name
=
var_name
+
".alpha"
,
default_initializer
=
fluid
.
initializer
.
NumpyArrayInitializer
(
alpha
))
def
_calculate_output_with_adarounded_weights
(
self
,
program
,
place
,
exe
,
data
,
fp32_fetch_list
,
weight_tensor_dequant
):
set_variable_data
(
self
.
scope
,
place
,
self
.
weight_var_name
,
weight_tensor_dequant
)
adaround_out_tensor
=
exe
.
run
(
program
=
program
,
feed
=
data
,
fetch_list
=
[
fp32_fetch_list
],
return_numpy
=
True
,
scope
=
self
.
scope
)
return
adaround_out_tensor
def
_calculate_quant_weight
(
self
):
np_alpha
=
load_variable_data
(
self
.
scope
,
self
.
alpha_name
)
h_alpha
=
compute_soft_rounding_np
(
np_alpha
)
# Scale the tensor
tensor_scale
=
quant_tensor
(
self
.
ori_weight_tensor
.
copy
(),
self
.
scale
,
quant_axis
=
self
.
quant_axis
)
weight_tensor
=
np
.
floor
(
tensor_scale
)
# Adaround the tensor
weight_tensor_quant
=
np
.
add
(
weight_tensor
,
h_alpha
)
return
weight_tensor_quant
def
_calculate_adarounded_weights
(
self
):
weight_tensor_quant
=
self
.
_calculate_quant_weight
()
# Dequantize the tensor
weight_tensor_dequant
=
dequant_tensor
(
weight_tensor_quant
+
self
.
offset
,
self
.
scale
,
quant_axis
=
self
.
quant_axis
)
return
weight_tensor_dequant
def
update_final_weights
(
self
):
weight_tensor_quant
=
self
.
_calculate_quant_weight
()
return
weight_tensor_quant
def
get_loss
(
self
,
beta
,
warm_start
,
adaround_out_tensor
,
orig_out_tensor
):
round_loss
=
self
.
adaround_loss
.
compute_round_loss
(
self
.
alpha_v
,
warm_start
,
beta
)
recon_loss
=
self
.
adaround_loss
.
compute_recon_loss
(
adaround_out_tensor
,
orig_out_tensor
)
loss
=
round_loss
+
recon_loss
losses
=
{
'loss'
:
loss
,
'round_loss'
:
round_loss
,
'recon_loss'
:
recon_loss
}
return
losses
def
update_beta_warm
(
self
,
cur_iteration
):
warm_start
=
cur_iteration
<
self
.
num_iterations
*
self
.
warm_start
beta
=
self
.
adaround_loss
.
compute_beta
(
self
.
num_iterations
,
cur_iteration
,
self
.
warm_start
)
return
beta
,
warm_start
def
run_adaround
(
data_loader
,
fp32_program
,
fetch_list
,
exe
,
scope
,
place
,
quantized_op_pairs
,
weight_op_pairs
,
scale_dict
,
num_iterations
=
1000
,
lr
=
0.001
,
fast_mode
=
True
):
fetch_op_name
=
fetch_list
[
0
].
name
final_weight_tensor_quant_dict
=
{}
for
weight_var_name
,
quant_op_out_name
in
quantized_op_pairs
.
items
():
_logger
.
info
(
'Start adaround op: {}'
.
format
(
weight_var_name
))
weight_op_type
=
weight_op_pairs
[
weight_var_name
]
# get scale and weight tensor
weight_var_tensor
=
load_variable_data
(
scope
,
weight_var_name
)
scale
=
scale_dict
[
weight_var_name
]
fp32_fetch_list
=
None
for
_op
in
fp32_program
.
global_block
().
ops
:
if
_op
.
type
==
"fetch"
:
_op
.
_rename_input
(
fetch_op_name
,
quant_op_out_name
)
fp32_fetch_list
=
fp32_program
.
global_block
().
var
(
quant_op_out_name
)
fetch_op_name
=
quant_op_out_name
# build adaround program
exec_strategy
=
fluid
.
ExecutionStrategy
()
exec_strategy
.
num_iteration_per_drop_scope
=
1
startup_program
=
fluid
.
Program
()
train_program
=
fluid
.
Program
()
with
fluid
.
program_guard
(
train_program
,
startup_program
):
with
fluid
.
unique_name
.
guard
():
# initialize adaround
adaround
=
AdaRound
(
scale
,
weight_var_tensor
,
scope
=
scope
,
weight_var_name
=
weight_var_name
,
weight_op_type
=
weight_op_type
,
num_iterations
=
num_iterations
)
orig_out_tensor
=
fluid
.
data
(
name
=
'orig_out_tensor'
,
shape
=
fp32_fetch_list
.
shape
,
dtype
=
'float32'
)
adaround_out_tensor
=
fluid
.
data
(
name
=
'adaround_out_tensor'
,
shape
=
fp32_fetch_list
.
shape
,
dtype
=
'float32'
)
beta_tensor
=
fluid
.
data
(
name
=
'beta'
,
shape
=
[
1
],
dtype
=
'float32'
)
warm_start_tensor
=
fluid
.
data
(
name
=
'warm_start'
,
shape
=
[
1
],
dtype
=
'bool'
)
train_fetches_loss
=
adaround
.
get_loss
(
beta_tensor
,
warm_start_tensor
,
adaround_out_tensor
,
orig_out_tensor
)
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
lr
)
loss
=
train_fetches_loss
[
'loss'
]
optimizer
.
minimize
(
loss
)
exe
.
run
(
startup_program
)
start_time
=
time
.
time
()
prev_start_time
=
start_time
for
i
,
data
in
enumerate
(
data_loader
()):
prev_start_time
=
start_time
start_time
=
time
.
time
()
# run fp32 model
np_orig_out_tensor
=
exe
.
run
(
program
=
fp32_program
,
feed
=
data
,
fetch_list
=
[
fp32_fetch_list
],
return_numpy
=
True
,
scope
=
scope
)
adaround_weight_tensor_dequant
=
adaround
.
_calculate_adarounded_weights
(
)
np_adaround_out_tensor
=
adaround
.
_calculate_output_with_adarounded_weights
(
fp32_program
,
place
,
exe
,
data
,
fp32_fetch_list
,
adaround_weight_tensor_dequant
)
# If the cosine distance of the two tensor is small, skip training
cos_error
=
calculate_quant_cos_error
(
np_orig_out_tensor
[
0
],
np_adaround_out_tensor
[
0
])
if
fast_mode
and
cos_error
>
0.99
:
_logger
.
info
(
"The cosine error is small, skip training."
)
break
beta
,
warm_start
=
adaround
.
update_beta_warm
(
i
)
feed_dict
=
{
'orig_out_tensor'
:
np_orig_out_tensor
[
0
],
'adaround_out_tensor'
:
np_adaround_out_tensor
[
0
],
'beta'
:
beta
,
'warm_start'
:
warm_start
}
out
=
exe
.
run
(
train_program
,
feed
=
feed_dict
,
fetch_list
=
[
v
.
name
for
v
in
train_fetches_loss
.
values
()],
return_numpy
=
True
)
_logger
.
info
(
"Iter {:d}, lr {:.5f}, loss {:.5f}, loss_round {:.5f}, loss_recon {:.5f}, time {:.5f}s"
.
format
(
i
,
lr
,
np
.
mean
(
out
[
0
]),
np
.
mean
(
out
[
1
]),
np
.
mean
(
out
[
2
]),
start_time
-
prev_start_time
))
sys
.
stdout
.
flush
()
if
i
==
num_iterations
:
break
final_weight_tensor_quant_dict
[
weight_var_name
]
=
adaround
.
update_final_weights
()
del
adaround
# update adarounded calibrated weights
for
weight_var_name
in
quantized_op_pairs
.
keys
():
set_variable_data
(
scope
,
place
,
weight_var_name
,
final_weight_tensor_quant_dict
[
weight_var_name
])
python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
浏览文件 @
3d5a27f0
...
...
@@ -35,6 +35,8 @@ from .quantization_pass import _get_output_name_index
from
.quantization_pass
import
_get_input_name_index
from
.quantization_pass
import
_channelwise_quant_axis1_ops
from
.cal_kl_threshold
import
cal_kl_threshold
from
.adaround
import
run_adaround
from
.utils
import
load_variable_data
,
set_variable_data
__all__
=
[
'PostTrainingQuantization'
,
'WeightQuantization'
]
...
...
@@ -42,28 +44,6 @@ _logger = get_logger(
__name__
,
logging
.
INFO
,
fmt
=
'%(asctime)s-%(levelname)s: %(message)s'
)
def
_load_variable_data
(
scope
,
var_name
):
'''
Load variable value from scope
'''
var_node
=
scope
.
find_var
(
var_name
)
assert
var_node
is
not
None
,
\
"Cannot find "
+
var_name
+
" in scope."
return
np
.
array
(
var_node
.
get_tensor
())
def
_set_variable_data
(
scope
,
place
,
var_name
,
np_value
):
'''
Set the value of var node by name, if the node exits,
'''
assert
isinstance
(
np_value
,
np
.
ndarray
),
\
'The type of value should be numpy array.'
var_node
=
scope
.
find_var
(
var_name
)
if
var_node
!=
None
:
tensor
=
var_node
.
get_tensor
()
tensor
.
set
(
np_value
,
place
)
def
_all_persistable_var_names
(
program
):
persistable_var_names
=
[]
for
var
in
program
.
list_vars
():
...
...
@@ -143,6 +123,8 @@ class PostTrainingQuantization(object):
algo
=
"KL"
,
hist_percent
=
0.99999
,
quantizable_op_type
=
[
"conv2d"
,
"depthwise_conv2d"
,
"mul"
],
round_type
=
'round'
,
learning_rate
=
0.001
,
is_full_quantize
=
False
,
bias_correction
=
False
,
activation_bits
=
8
,
...
...
@@ -198,6 +180,10 @@ class PostTrainingQuantization(object):
quantizable_op_type(list[str], optional): List the type of ops
that will be quantized. Default is ["conv2d", "depthwise_conv2d",
"mul"].
round_type(str, optional): The method of converting the quantized weights
value float->int. Currently supports ['round', 'adaround'] methods.
Default is `round`, which is rounding nearest to the nearest whole number.
learning_rate(float, optional): The learning rate of adaround method.
is_full_quantized(bool, optional): If set is_full_quantized as True,
apply quantization to all supported quantizable op type. If set
is_full_quantized as False, only apply quantization to the op type
...
...
@@ -274,6 +260,9 @@ class PostTrainingQuantization(object):
self
.
_support_algo_type
=
[
'KL'
,
'hist'
,
'avg'
,
'mse'
,
'emd'
,
'abs_max'
,
'min_max'
]
assert
round_type
in
[
'adaround'
,
'round'
]
self
.
_round_type
=
round_type
self
.
_learning_rate
=
learning_rate
self
.
_dynamic_quantize_op_type
=
[
'lstm'
]
self
.
_support_quantize_op_type
=
\
list
(
set
(
QuantizationTransformPass
.
_supported_quantizable_op_type
+
...
...
@@ -401,6 +390,10 @@ class PostTrainingQuantization(object):
if
self
.
_batch_nums
and
batch_id
>=
self
.
_batch_nums
:
break
_logger
.
info
(
"Finish sampling stage, all batch: "
+
str
(
batch_id
))
if
self
.
_round_type
==
'adaround'
:
self
.
_adaround_apply
()
self
.
_reset_activation_persistable
()
if
self
.
_algo
==
'avg'
:
for
var_name
in
self
.
_quantized_act_var_name
:
...
...
@@ -437,6 +430,24 @@ class PostTrainingQuantization(object):
return
self
.
_program
def
_adaround_apply
(
self
):
if
self
.
_algo
in
[
"KL"
,
"hist"
]:
scale_dict
=
self
.
_quantized_var_threshold
else
:
scale_dict
=
self
.
_quantized_threshold
run_adaround
(
self
.
_data_loader
,
self
.
_program
,
self
.
_fetch_list
,
self
.
_executor
,
self
.
_scope
,
self
.
_place
,
self
.
_quantized_op_pairs
,
self
.
_weight_op_pairs
,
scale_dict
,
num_iterations
=
self
.
_batch_nums
,
lr
=
self
.
_learning_rate
)
def
save_quantized_model
(
self
,
save_model_path
,
model_filename
=
None
,
...
...
@@ -519,6 +530,7 @@ class PostTrainingQuantization(object):
'''
# TODO(juncaipeng), consider the name_scope of skip_quant
_logger
.
info
(
"Collect quantized variable names ..."
)
self
.
_quantized_op_pairs
=
{}
def
collect_var_name
(
var_name_list
,
persistable_var_names
,
op_type
):
for
var_name
in
var_name_list
:
...
...
@@ -544,6 +556,12 @@ class PostTrainingQuantization(object):
collect_var_name
(
_get_op_output_var_names
(
op
),
persistable_var_names
,
op_type
)
# collect quanted op output var name
for
out_var_name
in
_get_op_output_var_names
(
op
):
for
in_var_name
in
_get_op_input_var_names
(
op
):
if
in_var_name
in
persistable_var_names
:
self
.
_quantized_op_pairs
[
in_var_name
]
=
out_var_name
# For other op, only sample output scale
elif
op_type
in
self
.
_out_scale_op_list
:
collect_var_name
(
...
...
@@ -590,7 +608,7 @@ class PostTrainingQuantization(object):
def
_sample_mse
(
self
):
if
self
.
_quantized_threshold
==
{}:
for
var_name
in
self
.
_quantized_weight_var_name
:
var_tensor
=
_
load_variable_data
(
self
.
_scope
,
var_name
)
var_tensor
=
load_variable_data
(
self
.
_scope
,
var_name
)
if
self
.
_weight_quantize_type
==
"abs_max"
:
abs_max_value
=
float
(
np
.
max
(
np
.
abs
(
var_tensor
)))
elif
self
.
_weight_quantize_type
==
"channel_wise_abs_max"
:
...
...
@@ -607,7 +625,7 @@ class PostTrainingQuantization(object):
self
.
_quantized_threshold
[
var_name
]
=
abs_max_value
_logger
.
info
(
"MSE searching stage ..."
)
for
var_name
in
self
.
_quantized_act_var_name
:
var_tensor
=
_
load_variable_data
(
self
.
_scope
,
var_name
)
var_tensor
=
load_variable_data
(
self
.
_scope
,
var_name
)
var_tensor
=
var_tensor
.
flatten
()
abs_max_value
=
float
(
np
.
max
(
np
.
abs
(
var_tensor
)))
abs_max_value
=
1e-8
if
abs_max_value
==
0.0
else
abs_max_value
...
...
@@ -629,7 +647,7 @@ class PostTrainingQuantization(object):
def
_sample_emd
(
self
):
if
self
.
_quantized_threshold
==
{}:
for
var_name
in
self
.
_quantized_weight_var_name
:
var_tensor
=
_
load_variable_data
(
self
.
_scope
,
var_name
)
var_tensor
=
load_variable_data
(
self
.
_scope
,
var_name
)
if
self
.
_weight_quantize_type
==
"abs_max"
:
abs_max_value
=
float
(
np
.
max
(
np
.
abs
(
var_tensor
)))
elif
self
.
_weight_quantize_type
==
"channel_wise_abs_max"
:
...
...
@@ -646,7 +664,7 @@ class PostTrainingQuantization(object):
self
.
_quantized_threshold
[
var_name
]
=
abs_max_value
_logger
.
info
(
"EMD searching stage ..."
)
for
var_name
in
self
.
_quantized_act_var_name
:
var_tensor
=
_
load_variable_data
(
self
.
_scope
,
var_name
)
var_tensor
=
load_variable_data
(
self
.
_scope
,
var_name
)
var_tensor
=
var_tensor
.
flatten
()
abs_max_value
=
float
(
np
.
max
(
np
.
abs
(
var_tensor
)))
abs_max_value
=
1e-8
if
abs_max_value
==
0.0
else
abs_max_value
...
...
@@ -670,7 +688,7 @@ class PostTrainingQuantization(object):
def
_sample_avg
(
self
):
if
self
.
_quantized_threshold
==
{}:
for
var_name
in
self
.
_quantized_weight_var_name
:
var_tensor
=
_
load_variable_data
(
self
.
_scope
,
var_name
)
var_tensor
=
load_variable_data
(
self
.
_scope
,
var_name
)
if
self
.
_weight_quantize_type
==
"abs_max"
:
abs_max_value
=
float
(
np
.
max
(
np
.
abs
(
var_tensor
)))
elif
self
.
_weight_quantize_type
==
"channel_wise_abs_max"
:
...
...
@@ -687,7 +705,7 @@ class PostTrainingQuantization(object):
self
.
_quantized_threshold
[
var_name
]
=
abs_max_value
for
var_name
in
self
.
_quantized_act_var_name
:
var_tensor
=
_
load_variable_data
(
self
.
_scope
,
var_name
)
var_tensor
=
load_variable_data
(
self
.
_scope
,
var_name
)
abs_max_value
=
float
(
np
.
max
(
np
.
abs
(
var_tensor
)))
if
(
var_name
not
in
self
.
_quantized_var_avg
):
self
.
_quantized_var_avg
[
var_name
]
=
[]
...
...
@@ -699,7 +717,7 @@ class PostTrainingQuantization(object):
def
_sample_abs_max
(
self
):
if
self
.
_quantized_threshold
==
{}:
for
var_name
in
self
.
_quantized_weight_var_name
:
var_tensor
=
_
load_variable_data
(
self
.
_scope
,
var_name
)
var_tensor
=
load_variable_data
(
self
.
_scope
,
var_name
)
if
self
.
_weight_quantize_type
==
"abs_max"
:
abs_max_value
=
float
(
np
.
max
(
np
.
abs
(
var_tensor
)))
elif
self
.
_weight_quantize_type
==
"channel_wise_abs_max"
:
...
...
@@ -716,7 +734,7 @@ class PostTrainingQuantization(object):
self
.
_quantized_threshold
[
var_name
]
=
abs_max_value
for
var_name
in
self
.
_quantized_act_var_name
:
var_tensor
=
_
load_variable_data
(
self
.
_scope
,
var_name
)
var_tensor
=
load_variable_data
(
self
.
_scope
,
var_name
)
abs_max_value
=
float
(
np
.
max
(
np
.
abs
(
var_tensor
)))
if
(
var_name
not
in
self
.
_quantized_threshold
)
or
\
(
abs_max_value
>
self
.
_quantized_threshold
[
var_name
]):
...
...
@@ -725,7 +743,7 @@ class PostTrainingQuantization(object):
def
_sample_min_max
(
self
):
if
self
.
_quantized_var_min
==
{}
and
self
.
_quantized_var_max
==
{}:
for
var_name
in
self
.
_quantized_weight_var_name
:
var_tensor
=
_
load_variable_data
(
self
.
_scope
,
var_name
)
var_tensor
=
load_variable_data
(
self
.
_scope
,
var_name
)
if
self
.
_weight_quantize_type
==
"abs_max"
:
min_value
=
float
(
np
.
min
(
var_tensor
))
max_value
=
float
(
np
.
max
(
var_tensor
))
...
...
@@ -745,7 +763,7 @@ class PostTrainingQuantization(object):
self
.
_quantized_var_max
[
var_name
]
=
max_value
for
var_name
in
self
.
_quantized_act_var_name
:
var_tensor
=
_
load_variable_data
(
self
.
_scope
,
var_name
)
var_tensor
=
load_variable_data
(
self
.
_scope
,
var_name
)
min_value
=
float
(
np
.
min
(
var_tensor
))
max_value
=
float
(
np
.
max
(
var_tensor
))
if
(
var_name
not
in
self
.
_quantized_var_min
)
or
\
...
...
@@ -757,7 +775,7 @@ class PostTrainingQuantization(object):
def
_sample_histogram
(
self
):
for
var_name
in
self
.
_quantized_act_var_name
:
var_tensor
=
_
load_variable_data
(
self
.
_scope
,
var_name
)
var_tensor
=
load_variable_data
(
self
.
_scope
,
var_name
)
var_tensor_abs
=
np
.
abs
(
var_tensor
)
bins
=
self
.
_sampling_act_histogram
[
var_name
][
1
]
hist
,
_
=
np
.
histogram
(
var_tensor_abs
,
bins
=
bins
)
...
...
@@ -787,7 +805,7 @@ class PostTrainingQuantization(object):
get the min and max value, and then calculate the threshold.
'''
for
var_name
in
self
.
_quantized_act_var_name
:
var_tensor
=
_
load_variable_data
(
self
.
_scope
,
var_name
)
var_tensor
=
load_variable_data
(
self
.
_scope
,
var_name
)
var_tensor
=
np
.
abs
(
var_tensor
)
min_value
=
float
(
np
.
min
(
var_tensor
))
max_value
=
float
(
np
.
max
(
var_tensor
))
...
...
@@ -821,7 +839,7 @@ class PostTrainingQuantization(object):
# Abs_max threshold for weights
for
var_name
in
self
.
_quantized_weight_var_name
:
weight_data
=
_
load_variable_data
(
self
.
_scope
,
var_name
)
weight_data
=
load_variable_data
(
self
.
_scope
,
var_name
)
if
self
.
_weight_quantize_type
==
"abs_max"
:
weight_threshold
=
float
(
np
.
max
(
np
.
abs
(
weight_data
)))
elif
self
.
_weight_quantize_type
==
"channel_wise_abs_max"
:
...
...
@@ -896,13 +914,13 @@ class PostTrainingQuantization(object):
else
:
scale_dict
=
self
.
_quantized_threshold
for
key
,
val
in
scale_dict
.
items
():
_
set_variable_data
(
set_variable_data
(
self
.
_scope
,
self
.
_place
,
key
+
".scale"
,
np
.
array
(
[
val
],
dtype
=
np
.
float32
))
_
set_variable_data
(
set_variable_data
(
self
.
_scope
,
self
.
_place
,
key
+
".quant_dequant.scale"
,
...
...
@@ -915,6 +933,7 @@ class PostTrainingQuantization(object):
place
=
self
.
_place
,
bias_correction
=
self
.
_bias_correction
,
weight_bits
=
self
.
_weight_bits
,
round_type
=
self
.
_round_type
,
activation_bits
=
self
.
_activation_bits
,
weight_quantize_type
=
self
.
_weight_quantize_type
,
quantizable_op_type
=
major_quantizable_op_types
)
...
...
@@ -961,7 +980,7 @@ class PostTrainingQuantization(object):
argname_index
[
0
]
+
str
(
argname_index
[
1
])
+
"_threshold"
,
"post_hist"
)
elif
self
.
_algo
in
[
"avg"
,
"abs_max"
,
"mse"
]:
elif
self
.
_algo
in
[
"avg"
,
"abs_max"
,
"mse"
,
"emd"
]:
save_info
(
op_node
,
out_var_name
,
self
.
_quantized_threshold
,
"out_threshold"
,
"post_"
+
str
(
self
.
_algo
))
save_info
(
...
...
@@ -1003,7 +1022,7 @@ class PostTrainingQuantization(object):
for
op
in
target_ops
:
for
var_name
in
_get_op_input_var_names
(
op
):
if
var_name
in
persistable_var_names
:
var_data
=
_
load_variable_data
(
self
.
_scope
,
var_name
)
var_data
=
load_variable_data
(
self
.
_scope
,
var_name
)
threshold
=
float
(
np
.
max
(
np
.
abs
(
var_data
)))
argname
,
index
=
_get_input_name_index
(
op
,
var_name
)
op
.
_set_attr
(
argname
+
str
(
index
)
+
"_threshold"
,
threshold
)
...
...
@@ -1249,7 +1268,7 @@ class WeightQuantization(object):
save_weight_dtype
=
np
.
int8
if
weight_bits
==
8
else
np
.
int16
# Get quantized scale and weight data
weight_data
=
_
load_variable_data
(
scope
,
var_name
)
weight_data
=
load_variable_data
(
scope
,
var_name
)
if
abs
(
threshold_rate
)
<
1e-10
:
threshold_value
=
np
.
max
(
np
.
abs
(
weight_data
))
else
:
...
...
@@ -1263,11 +1282,11 @@ class WeightQuantization(object):
# Set weight data
if
not
for_test
:
_
set_variable_data
(
scope
,
place
,
var_name
,
quantized_weight_data
)
set_variable_data
(
scope
,
place
,
var_name
,
quantized_weight_data
)
else
:
dequantized_weight_data
=
\
(
quantized_weight_data
*
scale
).
astype
(
np
.
float32
)
_
set_variable_data
(
scope
,
place
,
var_name
,
dequantized_weight_data
)
set_variable_data
(
scope
,
place
,
var_name
,
dequantized_weight_data
)
# Save info
op
.
_set_attr
(
'quantization_type'
,
'post_weight_abs_max'
)
...
...
@@ -1284,7 +1303,7 @@ class WeightQuantization(object):
save_weight_dtype
=
np
.
int8
if
weight_bits
==
8
else
np
.
int16
# Get quantized scale and weight data
weight_data
=
_
load_variable_data
(
scope
,
var_name
)
weight_data
=
load_variable_data
(
scope
,
var_name
)
if
op
.
type
==
"mul"
:
scales
,
quantized_weight_data
=
\
self
.
_mul_channel_wise_quantization
(
weight_data
,
...
...
@@ -1298,7 +1317,7 @@ class WeightQuantization(object):
# Set weight data
if
not
for_test
:
_
set_variable_data
(
scope
,
place
,
var_name
,
quantized_weight_data
)
set_variable_data
(
scope
,
place
,
var_name
,
quantized_weight_data
)
else
:
if
op
.
type
==
"mul"
:
dequantized_weight_data
=
\
...
...
@@ -1309,7 +1328,7 @@ class WeightQuantization(object):
else
:
_logger
.
error
(
op
.
type
+
" is not supported by weight quantization"
)
_
set_variable_data
(
scope
,
place
,
var_name
,
dequantized_weight_data
)
set_variable_data
(
scope
,
place
,
var_name
,
dequantized_weight_data
)
# Save info
op
.
_set_attr
(
'quantization_type'
,
'post_weight_channel_wise_abs_max'
)
...
...
python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
浏览文件 @
3d5a27f0
...
...
@@ -26,6 +26,7 @@ from ....data import data
from
....layers
import
mean
from
....executor
import
scope_guard
from
....framework
import
_get_paddle_place
from
.utils
import
_channelwise_quant_axis1_ops
,
quant_tensor
__all__
=
[
'QuantizationTransformPass'
,
'QuantizationFreezePass'
,
'ConvertToInt8Pass'
,
...
...
@@ -233,10 +234,6 @@ _op_real_in_out_name = {
_conv_ops
=
[
'conv2d'
,
'depthwise_conv2d'
,
'conv2d_transpose'
]
_channelwise_quant_axis1_ops
=
[
'conv2d_transpose'
,
'mul'
,
'matmul'
,
'matmul_v2'
]
def
_get_op_input_var_names
(
op
):
"""
...
...
@@ -1206,6 +1203,7 @@ class QuantizationFreezePass(object):
bias_correction
=
False
,
weight_bits
=
8
,
activation_bits
=
8
,
round_type
=
'round'
,
weight_quantize_type
=
'abs_max'
,
quantizable_op_type
=
None
):
"""
...
...
@@ -1223,6 +1221,9 @@ class QuantizationFreezePass(object):
https://arxiv.org/abs/1810.05723.
weight_bits(int): quantization bit number for weights.
activation_bits(int): quantization bit number for activation.
round_type(str, optional): The method of converting the quantized weights
value from float to int. Currently supports ['round', 'adaround'] methods.
Default is `round`, which is rounding nearest to the nearest whole number.
weight_quantize_type(str): quantization type for weights, support 'abs_max' and
'channel_wise_abs_max'. The 'range_abs_max' usually is not used for weight,
since weights are fixed once the model is well trained.
...
...
@@ -1238,6 +1239,7 @@ class QuantizationFreezePass(object):
self
.
_place
=
_get_paddle_place
(
place
)
self
.
_weight_bits
=
weight_bits
self
.
_activation_bits
=
activation_bits
self
.
_round_type
=
round_type
self
.
_weight_quantize_type
=
weight_quantize_type
self
.
_fake_quant_op_names
=
_fake_quant_op_list
self
.
_fake_dequant_op_names
=
_fake_dequant_op_list
...
...
@@ -1284,18 +1286,22 @@ class QuantizationFreezePass(object):
self
.
_quant_var_scale_map
[
input_arg_name
]
=
scale_v
# Quantize weight and restore
param_v
=
self
.
_load_var
(
input_arg_name
)
if
isinstance
(
scale_v
,
list
)
and
\
any
(
_check_grandchild_op_node
(
op_node
,
op
)
for
op
in
_channelwise_quant_axis1_ops
):
quant_axis
=
1
else
:
quant_axis
=
0
quantized_param_v
=
self
.
_quant
(
param_v
.
copy
(),
scale_v
,
self
.
_weight_bits
,
quant_axis
)
if
self
.
_bias_correction
==
True
:
quantized_param_v
=
self
.
_bias_correction_w
(
param_v
,
quantized_param_v
,
scale_v
,
quant_axis
)
self
.
_restore_var
(
input_arg_name
,
quantized_param_v
)
if
self
.
_round_type
==
'round'
:
if
any
(
_check_grandchild_op_node
(
op_node
,
op
)
for
op
in
_channelwise_quant_axis1_ops
):
quant_axis
=
1
else
:
quant_axis
=
0
quantized_param_v
=
quant_tensor
(
param_v
.
copy
(),
scale_v
,
quant_axis
,
self
.
_weight_bits
)
quantized_param_v
=
np
.
round
(
quantized_param_v
)
if
self
.
_bias_correction
==
True
:
quantized_param_v
=
self
.
_bias_correction_w
(
param_v
,
quantized_param_v
,
scale_v
,
quant_axis
)
quantized_param_v
=
np
.
round
(
quantized_param_v
)
self
.
_restore_var
(
input_arg_name
,
quantized_param_v
)
self
.
_remove_fake_quant_and_dequant_op
(
graph
,
op_node
)
# Remove all fake dequant op
...
...
@@ -1513,31 +1519,6 @@ class QuantizationFreezePass(object):
return
isinstance
(
v
,
float
)
or
isinstance
(
v
,
np
.
float32
)
\
or
isinstance
(
v
,
np
.
float64
)
def
_quant
(
self
,
x
,
scale
,
num_bits
,
quant_axis
):
assert
quant_axis
in
[
0
,
1
],
'quant_axis should be 0 or 1 for now.'
bnt
=
(
1
<<
(
num_bits
-
1
))
-
1
def
_clip
(
x
,
scale
):
x
[
x
>
scale
]
=
scale
x
[
x
<
-
scale
]
=
-
scale
return
x
if
isinstance
(
scale
,
list
):
for
i
,
s
in
enumerate
(
scale
):
if
s
==
0.0
:
s
=
1e-8
if
quant_axis
==
0
:
x
[
i
]
=
_clip
(
x
[
i
],
s
)
x
[
i
]
=
np
.
round
(
x
[
i
]
/
s
*
bnt
)
else
:
x
[:,
i
]
=
_clip
(
x
[:,
i
],
s
)
x
[:,
i
]
=
np
.
round
(
x
[:,
i
]
/
s
*
bnt
)
else
:
scale
=
1e-8
if
scale
==
0.0
else
scale
x
=
_clip
(
x
,
scale
)
x
=
np
.
round
(
x
/
scale
*
bnt
)
return
x
def
_bias_correction_w
(
self
,
x
,
x_quant
,
scale_v
,
quant_axis
):
'''
Bias correction for weight
...
...
@@ -1574,8 +1555,8 @@ class QuantizationFreezePass(object):
mean_bias
=
np
.
resize
(
mean_bias
,
x
.
shape
)
x_dequant
=
(
mean_bias
+
x_dequant
)
*
std_bias
quantized_param_v
=
self
.
_quant
(
x_dequant
,
scale_v
,
self
.
_weight_bit
s
,
quant_axi
s
)
quantized_param_v
=
quant_tensor
(
x_dequant
,
scale_v
,
quant_axi
s
,
self
.
_weight_bit
s
)
return
quantized_param_v
...
...
python/paddle/fluid/contrib/slim/quantization/utils.py
0 → 100644
浏览文件 @
3d5a27f0
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
numpy
as
np
_channelwise_quant_axis1_ops
=
[
'conv2d_transpose'
,
'mul'
,
'matmul'
,
'matmul_v2'
]
def
load_variable_data
(
scope
,
var_name
):
'''
Load variable value from scope
'''
var_node
=
scope
.
find_var
(
var_name
)
assert
var_node
is
not
None
,
\
"Cannot find "
+
var_name
+
" in scope."
return
np
.
array
(
var_node
.
get_tensor
())
def
set_variable_data
(
scope
,
place
,
var_name
,
np_value
):
'''
Set the value of var node by name, if the node exits,
'''
assert
isinstance
(
np_value
,
np
.
ndarray
),
\
'The type of value should be numpy array.'
var_node
=
scope
.
find_var
(
var_name
)
if
var_node
!=
None
:
tensor
=
var_node
.
get_tensor
()
tensor
.
set
(
np_value
,
place
)
def
quant_tensor
(
x
,
scale
,
quant_axis
=
0
,
weight_bits
=
8
):
# symmetry quant
def
_clip
(
x
,
scale
):
x
[
x
>
scale
]
=
scale
x
[
x
<
-
scale
]
=
-
scale
return
x
assert
quant_axis
in
[
0
,
1
],
'quant_axis should be 0 or 1 for now.'
bnt
=
(
1
<<
(
weight_bits
-
1
))
-
1
if
isinstance
(
scale
,
list
):
for
i
,
s
in
enumerate
(
scale
):
if
s
==
0.0
:
s
=
1e-8
if
quant_axis
==
0
:
x
[
i
]
=
_clip
(
x
[
i
],
s
)
x
[
i
]
=
x
[
i
]
/
s
*
bnt
else
:
x
[:,
i
]
=
_clip
(
x
[:,
i
],
s
)
x
[:,
i
]
=
x
[:,
i
]
/
s
*
bnt
else
:
scale
=
1e-8
if
scale
==
0.0
else
scale
x
=
_clip
(
x
,
scale
)
x
=
x
/
scale
*
bnt
return
x
def
dequant_tensor
(
x
,
scale
,
quant_axis
=
0
,
weight_bits
=
8
):
assert
quant_axis
in
[
0
,
1
],
'quant_axis should be 0 or 1 for now.'
bnt
=
(
1
<<
(
weight_bits
-
1
))
-
1
if
isinstance
(
scale
,
list
):
for
i
,
s
in
enumerate
(
scale
):
if
s
==
0.0
:
s
=
1e-8
if
quant_axis
==
0
:
x
[
i
]
=
x
[
i
]
*
s
/
bnt
else
:
x
[:,
i
]
=
x
[:,
i
]
*
s
/
bnt
else
:
scale
=
1e-8
if
scale
==
0.0
else
scale
x
=
x
*
scale
/
bnt
return
x
def
stable_sigmoid
(
x
):
sig
=
np
.
where
(
x
<
0
,
np
.
exp
(
x
)
/
(
1
+
np
.
exp
(
x
)),
1
/
(
1
+
np
.
exp
(
-
x
)))
return
sig
def
calculate_quant_cos_error
(
orig_tensor
,
qdq_tensor
):
cos_sim
=
np
.
inner
(
orig_tensor
.
flatten
(),
qdq_tensor
.
flatten
())
\
/
(
np
.
linalg
.
norm
(
orig_tensor
.
flatten
())
*
np
.
linalg
.
norm
(
qdq_tensor
.
flatten
()))
return
cos_sim
python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_lstm_model.py
浏览文件 @
3d5a27f0
...
...
@@ -167,6 +167,7 @@ class TestPostTrainingQuantization(unittest.TestCase):
model_path
,
data_path
,
algo
=
"KL"
,
round_type
=
"round"
,
quantizable_op_type
=
[
"conv2d"
],
is_full_quantize
=
False
,
is_use_cache_file
=
False
,
...
...
@@ -186,6 +187,7 @@ class TestPostTrainingQuantization(unittest.TestCase):
batch_nums
=
batch_nums
,
algo
=
algo
,
quantizable_op_type
=
quantizable_op_type
,
round_type
=
round_type
,
is_full_quantize
=
is_full_quantize
,
optimize_model
=
is_optimize_model
,
is_use_cache_file
=
is_use_cache_file
)
...
...
@@ -193,9 +195,9 @@ class TestPostTrainingQuantization(unittest.TestCase):
ptq
.
save_quantized_model
(
self
.
int8_model_path
)
def
run_test
(
self
,
model_name
,
model_url
,
model_md5
,
data_name
,
data_url
,
data_md5
,
algo
,
quantizable_op_type
,
is_full_quantiz
e
,
is_
use_cache_file
,
is_optimize_model
,
diff_threshold
,
infer_iterations
,
quant_iterations
):
data_md5
,
algo
,
round_type
,
quantizable_op_typ
e
,
is_
full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
,
infer_iterations
,
quant_iterations
):
fp32_model_path
=
self
.
download_model
(
model_url
,
model_md5
,
model_name
)
fp32_model_path
=
os
.
path
.
join
(
fp32_model_path
,
model_name
)
...
...
@@ -210,9 +212,9 @@ class TestPostTrainingQuantization(unittest.TestCase):
print
(
"Start post training quantization for {0} on {1} samples ..."
.
format
(
model_name
,
quant_iterations
))
self
.
generate_quantized_model
(
fp32_model_path
,
data_path
,
algo
,
quantizable_op_type
,
is_full_quantiz
e
,
is_
use_cache_file
,
is_optimize_model
,
quant_iterations
)
round_type
,
quantizable_op_typ
e
,
is_
full_quantize
,
is_use_cache_file
,
is_optimize_model
,
quant_iterations
)
print
(
"Start INT8 inference for {0} on {1} samples ..."
.
format
(
model_name
,
infer_iterations
))
...
...
@@ -239,6 +241,7 @@ class TestPostTrainingKLForMnist(TestPostTrainingQuantization):
data_url
=
"https://paddle-inference-dist.cdn.bcebos.com/int8/unittest_model_data/quant_lstm_input_data.tar.gz"
data_md5
=
"add84c754e9b792fea1fbd728d134ab7"
algo
=
"KL"
round_type
=
"round"
quantizable_op_type
=
[
"mul"
,
"lstm"
]
is_full_quantize
=
False
is_use_cache_file
=
False
...
...
@@ -247,9 +250,32 @@ class TestPostTrainingKLForMnist(TestPostTrainingQuantization):
infer_iterations
=
100
quant_iterations
=
10
self
.
run_test
(
model_name
,
model_url
,
model_md5
,
data_name
,
data_url
,
data_md5
,
algo
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
,
infer_iterations
,
quant_iterations
)
data_md5
,
algo
,
round_type
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
,
infer_iterations
,
quant_iterations
)
class
TestPostTrainingKLForMnistAdaround
(
TestPostTrainingQuantization
):
def
test_post_training_kl
(
self
):
model_name
=
"nlp_lstm_fp32_model"
model_url
=
"https://paddle-inference-dist.cdn.bcebos.com/int8/unittest_model_data/nlp_lstm_fp32_model.tar.gz"
model_md5
=
"519b8eeac756e7b4b7bcb2868e880452"
data_name
=
"quant_lstm_input_data"
data_url
=
"https://paddle-inference-dist.cdn.bcebos.com/int8/unittest_model_data/quant_lstm_input_data.tar.gz"
data_md5
=
"add84c754e9b792fea1fbd728d134ab7"
algo
=
"KL"
round_type
=
"adaround"
quantizable_op_type
=
[
"mul"
,
"lstm"
]
is_full_quantize
=
False
is_use_cache_file
=
False
is_optimize_model
=
False
diff_threshold
=
0.01
infer_iterations
=
100
quant_iterations
=
10
self
.
run_test
(
model_name
,
model_url
,
model_md5
,
data_name
,
data_url
,
data_md5
,
algo
,
round_type
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
,
infer_iterations
,
quant_iterations
)
if
__name__
==
'__main__'
:
...
...
python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py
浏览文件 @
3d5a27f0
...
...
@@ -110,6 +110,7 @@ class TestPostTrainingQuantization(unittest.TestCase):
def
generate_quantized_model
(
self
,
model_path
,
algo
=
"KL"
,
round_type
=
"round"
,
quantizable_op_type
=
[
"conv2d"
],
is_full_quantize
=
False
,
is_use_cache_file
=
False
,
...
...
@@ -130,6 +131,7 @@ class TestPostTrainingQuantization(unittest.TestCase):
batch_nums
=
batch_nums
,
algo
=
algo
,
quantizable_op_type
=
quantizable_op_type
,
round_type
=
round_type
,
is_full_quantize
=
is_full_quantize
,
optimize_model
=
is_optimize_model
,
is_use_cache_file
=
is_use_cache_file
)
...
...
@@ -141,6 +143,7 @@ class TestPostTrainingQuantization(unittest.TestCase):
data_url
,
data_md5
,
algo
,
round_type
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
...
...
@@ -160,9 +163,10 @@ class TestPostTrainingQuantization(unittest.TestCase):
print
(
"Start INT8 post training quantization for {0} on {1} images ..."
.
format
(
model_name
,
quant_iterations
*
batch_size
))
self
.
generate_quantized_model
(
origin_model_path
,
algo
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
batch_size
,
quant_iterations
)
self
.
generate_quantized_model
(
origin_model_path
,
algo
,
round_type
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
batch_size
,
quant_iterations
)
print
(
"Start INT8 inference for {0} on {1} images ..."
.
format
(
model_name
,
infer_iterations
*
batch_size
))
...
...
@@ -190,6 +194,7 @@ class TestPostTrainingKLForMnist(TestPostTrainingQuantization):
data_url
=
"http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz"
data_md5
=
"be71d3997ec35ac2a65ae8a145e2887c"
algo
=
"KL"
round_type
=
"round"
quantizable_op_type
=
[
"conv2d"
,
"depthwise_conv2d"
,
"mul"
]
is_full_quantize
=
False
is_use_cache_file
=
False
...
...
@@ -198,10 +203,10 @@ class TestPostTrainingKLForMnist(TestPostTrainingQuantization):
batch_size
=
10
infer_iterations
=
50
quant_iterations
=
5
self
.
run_test
(
model_name
,
data_url
,
data_md5
,
algo
,
quantizable_op
_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
,
batch_size
,
infer_iterations
,
quant_iterations
)
self
.
run_test
(
model_name
,
data_url
,
data_md5
,
algo
,
round
_type
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
,
batch_size
,
infer_iterations
,
quant_iterations
)
class
TestPostTraininghistForMnist
(
TestPostTrainingQuantization
):
...
...
@@ -210,6 +215,7 @@ class TestPostTraininghistForMnist(TestPostTrainingQuantization):
data_url
=
"http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz"
data_md5
=
"be71d3997ec35ac2a65ae8a145e2887c"
algo
=
"hist"
round_type
=
"round"
quantizable_op_type
=
[
"conv2d"
,
"depthwise_conv2d"
,
"mul"
]
is_full_quantize
=
False
is_use_cache_file
=
False
...
...
@@ -218,10 +224,10 @@ class TestPostTraininghistForMnist(TestPostTrainingQuantization):
batch_size
=
10
infer_iterations
=
50
quant_iterations
=
5
self
.
run_test
(
model_name
,
data_url
,
data_md5
,
algo
,
quantizable_op
_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
,
batch_size
,
infer_iterations
,
quant_iterations
)
self
.
run_test
(
model_name
,
data_url
,
data_md5
,
algo
,
round
_type
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
,
batch_size
,
infer_iterations
,
quant_iterations
)
class
TestPostTrainingmseForMnist
(
TestPostTrainingQuantization
):
...
...
@@ -230,6 +236,7 @@ class TestPostTrainingmseForMnist(TestPostTrainingQuantization):
data_url
=
"http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz"
data_md5
=
"be71d3997ec35ac2a65ae8a145e2887c"
algo
=
"mse"
round_type
=
"round"
quantizable_op_type
=
[
"conv2d"
,
"depthwise_conv2d"
,
"mul"
]
is_full_quantize
=
False
is_use_cache_file
=
False
...
...
@@ -238,10 +245,10 @@ class TestPostTrainingmseForMnist(TestPostTrainingQuantization):
batch_size
=
10
infer_iterations
=
50
quant_iterations
=
5
self
.
run_test
(
model_name
,
data_url
,
data_md5
,
algo
,
quantizable_op
_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
,
batch_size
,
infer_iterations
,
quant_iterations
)
self
.
run_test
(
model_name
,
data_url
,
data_md5
,
algo
,
round
_type
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
,
batch_size
,
infer_iterations
,
quant_iterations
)
class
TestPostTrainingemdForMnist
(
TestPostTrainingQuantization
):
...
...
@@ -250,6 +257,7 @@ class TestPostTrainingemdForMnist(TestPostTrainingQuantization):
data_url
=
"http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz"
data_md5
=
"be71d3997ec35ac2a65ae8a145e2887c"
algo
=
"emd"
round_type
=
"round"
quantizable_op_type
=
[
"conv2d"
,
"depthwise_conv2d"
,
"mul"
]
is_full_quantize
=
False
is_use_cache_file
=
False
...
...
@@ -258,10 +266,10 @@ class TestPostTrainingemdForMnist(TestPostTrainingQuantization):
batch_size
=
10
infer_iterations
=
50
quant_iterations
=
5
self
.
run_test
(
model_name
,
data_url
,
data_md5
,
algo
,
quantizable_op
_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
,
batch_size
,
infer_iterations
,
quant_iterations
)
self
.
run_test
(
model_name
,
data_url
,
data_md5
,
algo
,
round
_type
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
,
batch_size
,
infer_iterations
,
quant_iterations
)
class
TestPostTrainingavgForMnist
(
TestPostTrainingQuantization
):
...
...
@@ -270,6 +278,7 @@ class TestPostTrainingavgForMnist(TestPostTrainingQuantization):
data_url
=
"http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz"
data_md5
=
"be71d3997ec35ac2a65ae8a145e2887c"
algo
=
"avg"
round_type
=
"round"
quantizable_op_type
=
[
"conv2d"
,
"depthwise_conv2d"
,
"mul"
]
is_full_quantize
=
False
is_use_cache_file
=
False
...
...
@@ -278,10 +287,10 @@ class TestPostTrainingavgForMnist(TestPostTrainingQuantization):
batch_size
=
10
infer_iterations
=
50
quant_iterations
=
5
self
.
run_test
(
model_name
,
data_url
,
data_md5
,
algo
,
quantizable_op
_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
,
batch_size
,
infer_iterations
,
quant_iterations
)
self
.
run_test
(
model_name
,
data_url
,
data_md5
,
algo
,
round
_type
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
,
batch_size
,
infer_iterations
,
quant_iterations
)
class
TestPostTrainingAbsMaxForMnist
(
TestPostTrainingQuantization
):
...
...
@@ -290,6 +299,7 @@ class TestPostTrainingAbsMaxForMnist(TestPostTrainingQuantization):
data_url
=
"http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz"
data_md5
=
"be71d3997ec35ac2a65ae8a145e2887c"
algo
=
"abs_max"
round_type
=
"round"
quantizable_op_type
=
[
"conv2d"
,
"mul"
]
is_full_quantize
=
True
is_use_cache_file
=
False
...
...
@@ -298,10 +308,31 @@ class TestPostTrainingAbsMaxForMnist(TestPostTrainingQuantization):
batch_size
=
10
infer_iterations
=
50
quant_iterations
=
10
self
.
run_test
(
model_name
,
data_url
,
data_md5
,
algo
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
,
batch_size
,
infer_iterations
,
quant_iterations
)
self
.
run_test
(
model_name
,
data_url
,
data_md5
,
algo
,
round_type
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
,
batch_size
,
infer_iterations
,
quant_iterations
)
class
TestPostTrainingmseAdaroundForMnist
(
TestPostTrainingQuantization
):
def
test_post_training_mse
(
self
):
model_name
=
"mnist_model"
data_url
=
"http://paddle-inference-dist.bj.bcebos.com/int8/mnist_model.tar.gz"
data_md5
=
"be71d3997ec35ac2a65ae8a145e2887c"
algo
=
"mse"
round_type
=
"adaround"
quantizable_op_type
=
[
"conv2d"
,
"depthwise_conv2d"
,
"mul"
]
is_full_quantize
=
False
is_use_cache_file
=
False
is_optimize_model
=
True
diff_threshold
=
0.01
batch_size
=
10
infer_iterations
=
50
quant_iterations
=
5
self
.
run_test
(
model_name
,
data_url
,
data_md5
,
algo
,
round_type
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
,
batch_size
,
infer_iterations
,
quant_iterations
)
if
__name__
==
'__main__'
:
...
...
python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py
浏览文件 @
3d5a27f0
...
...
@@ -240,6 +240,7 @@ class TestPostTrainingQuantization(unittest.TestCase):
model_path
,
quantizable_op_type
,
algo
=
"KL"
,
round_type
=
"round"
,
is_full_quantize
=
False
,
is_use_cache_file
=
False
,
is_optimize_model
=
False
):
...
...
@@ -261,15 +262,16 @@ class TestPostTrainingQuantization(unittest.TestCase):
model_dir
=
model_path
,
algo
=
algo
,
quantizable_op_type
=
quantizable_op_type
,
round_type
=
round_type
,
is_full_quantize
=
is_full_quantize
,
optimize_model
=
is_optimize_model
,
is_use_cache_file
=
is_use_cache_file
)
ptq
.
quantize
()
ptq
.
save_quantized_model
(
self
.
int8_model
)
def
run_test
(
self
,
model
,
algo
,
data_urls
,
data_md5s
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
):
def
run_test
(
self
,
model
,
algo
,
round_type
,
data_urls
,
data_md5s
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
):
infer_iterations
=
self
.
infer_iterations
batch_size
=
self
.
batch_size
sample_iterations
=
self
.
sample_iterations
...
...
@@ -285,7 +287,7 @@ class TestPostTrainingQuantization(unittest.TestCase):
format
(
model
,
sample_iterations
*
batch_size
))
self
.
generate_quantized_model
(
model_cache_folder
+
"/model"
,
quantizable_op_type
,
algo
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
)
round_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
)
print
(
"Start INT8 inference for {0} on {1} images ..."
.
format
(
model
,
infer_iterations
*
batch_size
))
...
...
@@ -309,6 +311,7 @@ class TestPostTrainingKLForMobilenetv1(TestPostTrainingQuantization):
def
test_post_training_kl_mobilenetv1
(
self
):
model
=
"MobileNet-V1"
algo
=
"KL"
round_type
=
"round"
data_urls
=
[
'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz'
]
...
...
@@ -323,15 +326,16 @@ class TestPostTrainingKLForMobilenetv1(TestPostTrainingQuantization):
is_use_cache_file
=
False
is_optimize_model
=
True
diff_threshold
=
0.025
self
.
run_test
(
model
,
algo
,
data_urls
,
data_md5s
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
)
self
.
run_test
(
model
,
algo
,
round_type
,
data_urls
,
data_md5s
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
)
class
TestPostTrainingavgForMobilenetv1
(
TestPostTrainingQuantization
):
def
test_post_training_avg_mobilenetv1
(
self
):
model
=
"MobileNet-V1"
algo
=
"avg"
round_type
=
"round"
data_urls
=
[
'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz'
]
...
...
@@ -345,15 +349,16 @@ class TestPostTrainingavgForMobilenetv1(TestPostTrainingQuantization):
is_use_cache_file
=
False
is_optimize_model
=
True
diff_threshold
=
0.025
self
.
run_test
(
model
,
algo
,
data_urls
,
data_md5s
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
)
self
.
run_test
(
model
,
algo
,
round_type
,
data_urls
,
data_md5s
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
)
class
TestPostTraininghistForMobilenetv1
(
TestPostTrainingQuantization
):
def
test_post_training_hist_mobilenetv1
(
self
):
model
=
"MobileNet-V1"
algo
=
"hist"
round_type
=
"round"
data_urls
=
[
'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz'
]
...
...
@@ -367,15 +372,16 @@ class TestPostTraininghistForMobilenetv1(TestPostTrainingQuantization):
is_use_cache_file
=
False
is_optimize_model
=
True
diff_threshold
=
0.025
self
.
run_test
(
model
,
algo
,
data_urls
,
data_md5s
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
)
self
.
run_test
(
model
,
algo
,
round_type
,
data_urls
,
data_md5s
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
)
class
TestPostTrainingAbsMaxForMobilenetv1
(
TestPostTrainingQuantization
):
def
test_post_training_abs_max_mobilenetv1
(
self
):
model
=
"MobileNet-V1"
algo
=
"abs_max"
round_type
=
"round"
data_urls
=
[
'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz'
]
...
...
@@ -389,15 +395,110 @@ class TestPostTrainingAbsMaxForMobilenetv1(TestPostTrainingQuantization):
is_optimize_model
=
False
# The accuracy diff of post-traing quantization (abs_max) maybe bigger
diff_threshold
=
0.05
self
.
run_test
(
model
,
algo
,
data_urls
,
data_md5s
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
)
self
.
run_test
(
model
,
algo
,
round_type
,
data_urls
,
data_md5s
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
)
class
TestPostTrainingAvgAdaRoundForMobilenetv1
(
TestPostTrainingQuantization
):
def
test_post_training_adaround_mobilenetv1
(
self
):
model
=
"MobileNet-V1"
algo
=
"avg"
round_type
=
"adaround"
data_urls
=
[
'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz'
]
data_md5s
=
[
'13892b0716d26443a8cdea15b3c6438b'
]
quantizable_op_type
=
[
"conv2d"
,
"depthwise_conv2d"
,
"mul"
,
]
is_full_quantize
=
False
is_use_cache_file
=
False
is_optimize_model
=
True
diff_threshold
=
0.025
self
.
run_test
(
model
,
algo
,
round_type
,
data_urls
,
data_md5s
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
)
class
TestPostTrainingAbsMaxAdaRoundForMobilenetv1
(
TestPostTrainingQuantization
):
def
test_post_training_adaround_mobilenetv1
(
self
):
model
=
"MobileNet-V1"
algo
=
"abs_max"
round_type
=
"adaround"
data_urls
=
[
'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz'
]
data_md5s
=
[
'13892b0716d26443a8cdea15b3c6438b'
]
quantizable_op_type
=
[
"conv2d"
,
"depthwise_conv2d"
,
"mul"
,
]
is_full_quantize
=
False
is_use_cache_file
=
False
is_optimize_model
=
True
diff_threshold
=
0.025
self
.
run_test
(
model
,
algo
,
round_type
,
data_urls
,
data_md5s
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
)
class
TestPostTraininghistAdaroundForMobilenetv1
(
TestPostTrainingQuantization
):
def
test_post_training_hist_mobilenetv1
(
self
):
model
=
"MobileNet-V1"
algo
=
"hist"
round_type
=
"adaround"
data_urls
=
[
'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz'
]
data_md5s
=
[
'13892b0716d26443a8cdea15b3c6438b'
]
quantizable_op_type
=
[
"conv2d"
,
"depthwise_conv2d"
,
"mul"
,
]
is_full_quantize
=
False
is_use_cache_file
=
False
is_optimize_model
=
True
diff_threshold
=
0.025
self
.
run_test
(
model
,
algo
,
round_type
,
data_urls
,
data_md5s
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
)
class
TestPostTrainingKLAdaroundForMobilenetv1
(
TestPostTrainingQuantization
):
def
test_post_training_kl_mobilenetv1
(
self
):
model
=
"MobileNet-V1"
algo
=
"KL"
round_type
=
"adaround"
data_urls
=
[
'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz'
]
data_md5s
=
[
'13892b0716d26443a8cdea15b3c6438b'
]
quantizable_op_type
=
[
"conv2d"
,
"depthwise_conv2d"
,
"mul"
,
"pool2d"
,
]
is_full_quantize
=
False
is_use_cache_file
=
False
is_optimize_model
=
True
diff_threshold
=
0.025
self
.
run_test
(
model
,
algo
,
round_type
,
data_urls
,
data_md5s
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
)
class
TestPostTrainingEMDForMobilenetv1
(
TestPostTrainingQuantization
):
def
test_post_training_avg_mobilenetv1
(
self
):
model
=
"MobileNet-V1"
algo
=
"emd"
round_type
=
"round"
data_urls
=
[
'http://paddle-inference-dist.bj.bcebos.com/int8/mobilenetv1_int8_model.tar.gz'
]
...
...
@@ -411,9 +512,9 @@ class TestPostTrainingEMDForMobilenetv1(TestPostTrainingQuantization):
is_use_cache_file
=
False
is_optimize_model
=
True
diff_threshold
=
0.025
self
.
run_test
(
model
,
algo
,
data_urls
,
data_md5s
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
)
self
.
run_test
(
model
,
algo
,
round_type
,
data_urls
,
data_md5s
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
)
if
__name__
==
'__main__'
:
...
...
python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_resnet50.py
浏览文件 @
3d5a27f0
...
...
@@ -24,6 +24,7 @@ class TestPostTrainingForResnet50(TestPostTrainingQuantization):
def
test_post_training_resnet50
(
self
):
model
=
"ResNet-50"
algo
=
"min_max"
round_type
=
"round"
data_urls
=
[
'http://paddle-inference-dist.bj.bcebos.com/int8/resnet50_int8_model.tar.gz'
]
...
...
@@ -33,9 +34,9 @@ class TestPostTrainingForResnet50(TestPostTrainingQuantization):
is_use_cache_file
=
False
is_optimize_model
=
False
diff_threshold
=
0.025
self
.
run_test
(
model
,
algo
,
data_urls
,
data_md5s
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
)
self
.
run_test
(
model
,
algo
,
round_type
,
data_urls
,
data_md5s
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
)
if
__name__
==
'__main__'
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录