Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
5d8d463c
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
5d8d463c
编写于
1月 18, 2021
作者:
C
cc
提交者:
GitHub
1月 18, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Collect weight threshold for lstm op in post_training_quantization (#28701)
* Collect weight threshold of lstm, test=develop
上级
11e78eba
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
311 addition
and
2 deletion
+311
-2
python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
...d/contrib/slim/quantization/post_training_quantization.py
+36
-1
python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
...ddle/fluid/contrib/slim/quantization/quantization_pass.py
+16
-0
python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
+3
-1
python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_lstm_model.py
.../slim/tests/test_post_training_quantization_lstm_model.py
+256
-0
未找到文件。
python/paddle/fluid/contrib/slim/quantization/post_training_quantization.py
浏览文件 @
5d8d463c
...
@@ -29,6 +29,7 @@ from .quantization_pass import _out_scale_op_list
...
@@ -29,6 +29,7 @@ from .quantization_pass import _out_scale_op_list
from
.quantization_pass
import
_get_op_input_var_names
from
.quantization_pass
import
_get_op_input_var_names
from
.quantization_pass
import
_get_op_output_var_names
from
.quantization_pass
import
_get_op_output_var_names
from
.quantization_pass
import
_get_output_name_index
from
.quantization_pass
import
_get_output_name_index
from
.quantization_pass
import
_get_input_name_index
from
.quantization_pass
import
_channelwise_quant_axis1_ops
from
.quantization_pass
import
_channelwise_quant_axis1_ops
__all__
=
[
'PostTrainingQuantization'
,
'WeightQuantization'
]
__all__
=
[
'PostTrainingQuantization'
,
'WeightQuantization'
]
...
@@ -253,9 +254,11 @@ class PostTrainingQuantization(object):
...
@@ -253,9 +254,11 @@ class PostTrainingQuantization(object):
]
]
self
.
_support_weight_quantize_type
=
[
'abs_max'
,
'channel_wise_abs_max'
]
self
.
_support_weight_quantize_type
=
[
'abs_max'
,
'channel_wise_abs_max'
]
self
.
_support_algo_type
=
[
'KL'
,
'abs_max'
,
'min_max'
]
self
.
_support_algo_type
=
[
'KL'
,
'abs_max'
,
'min_max'
]
self
.
_dynamic_quantize_op_type
=
[
'lstm'
]
self
.
_support_quantize_op_type
=
\
self
.
_support_quantize_op_type
=
\
list
(
set
(
QuantizationTransformPass
.
_supported_quantizable_op_type
+
list
(
set
(
QuantizationTransformPass
.
_supported_quantizable_op_type
+
AddQuantDequantPass
.
_supported_quantizable_op_type
))
AddQuantDequantPass
.
_supported_quantizable_op_type
+
self
.
_dynamic_quantize_op_type
))
# Check inputs
# Check inputs
assert
executor
is
not
None
,
"The executor cannot be None."
assert
executor
is
not
None
,
"The executor cannot be None."
...
@@ -381,6 +384,10 @@ class PostTrainingQuantization(object):
...
@@ -381,6 +384,10 @@ class PostTrainingQuantization(object):
self
.
_save_input_threhold
()
self
.
_save_input_threhold
()
self
.
_save_output_threshold
()
self
.
_save_output_threshold
()
if
any
(
op_type
in
self
.
_quantizable_op_type
for
op_type
in
self
.
_dynamic_quantize_op_type
):
self
.
_collect_dynamic_quantize_op_threshold
(
self
.
_dynamic_quantize_op_type
)
return
self
.
_program
return
self
.
_program
def
save_quantized_model
(
self
,
def
save_quantized_model
(
self
,
...
@@ -776,6 +783,34 @@ class PostTrainingQuantization(object):
...
@@ -776,6 +783,34 @@ class PostTrainingQuantization(object):
for
var_name
in
out_var_names
:
for
var_name
in
out_var_names
:
analysis_and_save_info
(
op
,
var_name
)
analysis_and_save_info
(
op
,
var_name
)
def
_collect_dynamic_quantize_op_threshold
(
self
,
target_ops_type
):
"""
Collect and save the weight threshold for dynamic quantize ops,
such as lstm and gru.
Args:
target_ops_type(list): the op type of target ops
Returns:
None
"""
target_ops
=
[]
for
index
in
range
(
self
.
_program
.
num_blocks
):
for
op
in
self
.
_program
.
block
(
index
).
ops
:
if
op
.
type
in
target_ops_type
:
target_ops
.
append
(
op
)
quantization_type
=
str
(
"post_"
+
self
.
_algo
).
lower
()
persistable_var_names
=
_all_persistable_var_names
(
self
.
_program
)
for
op
in
target_ops
:
for
var_name
in
_get_op_input_var_names
(
op
):
if
var_name
in
persistable_var_names
:
var_data
=
_load_variable_data
(
self
.
_scope
,
var_name
)
threshold
=
float
(
np
.
max
(
np
.
abs
(
var_data
)))
argname
,
index
=
_get_input_name_index
(
op
,
var_name
)
op
.
_set_attr
(
argname
+
str
(
index
)
+
"_threshold"
,
threshold
)
op
.
_set_attr
(
"quantization_type"
,
quantization_type
)
op
.
_set_attr
(
"bit_length"
,
self
.
_weight_bits
)
def
_get_kl_scaling_factor
(
self
,
hist
,
hist_edeges
,
num_quantized_bins
=
255
):
def
_get_kl_scaling_factor
(
self
,
hist
,
hist_edeges
,
num_quantized_bins
=
255
):
'''
'''
Using the KL-divergenc method to get the more precise scaling factor.
Using the KL-divergenc method to get the more precise scaling factor.
...
...
python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
浏览文件 @
5d8d463c
...
@@ -120,6 +120,7 @@ _op_real_in_out_name = {
...
@@ -120,6 +120,7 @@ _op_real_in_out_name = {
"hard_swish"
:
[[
"X"
],
[
"Out"
]],
"hard_swish"
:
[[
"X"
],
[
"Out"
]],
"hard_sigmoid"
:
[[
"X"
],
[
"Out"
]],
"hard_sigmoid"
:
[[
"X"
],
[
"Out"
]],
"gru"
:
[[
"Input"
,
"Weight"
],
[
"Hidden"
]],
"gru"
:
[[
"Input"
,
"Weight"
],
[
"Hidden"
]],
"lstm"
:
[[
"Input"
,
"Weight"
],
[
"Hidden"
]],
}
}
_conv_ops
=
[
'conv2d'
,
'depthwise_conv2d'
,
'conv2d_transpose'
]
_conv_ops
=
[
'conv2d'
,
'depthwise_conv2d'
,
'conv2d_transpose'
]
...
@@ -144,6 +145,21 @@ def _get_op_input_var_names(op):
...
@@ -144,6 +145,21 @@ def _get_op_input_var_names(op):
return
var_names
return
var_names
def
_get_input_name_index
(
op
,
input_var_name
):
"""Get the input name and index of the var_name in the op"""
assert
isinstance
(
op
,
(
IrNode
,
Operator
)),
\
"The input op should be IrNode or Operator."
op_name
=
op
.
name
()
if
isinstance
(
op
,
IrNode
)
\
else
op
.
type
res
=
None
for
argname
in
_op_real_in_out_name
[
op_name
][
0
]:
var_names
=
op
.
input
(
argname
)
for
index
,
name
in
enumerate
(
var_names
):
if
name
==
input_var_name
:
res
=
(
argname
,
index
)
return
res
def
_get_op_output_var_names
(
op
):
def
_get_op_output_var_names
(
op
):
""" """
""" """
assert
isinstance
(
op
,
(
IrNode
,
Operator
)),
\
assert
isinstance
(
op
,
(
IrNode
,
Operator
)),
\
...
...
python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
浏览文件 @
5d8d463c
...
@@ -124,6 +124,7 @@ if(WIN32)
...
@@ -124,6 +124,7 @@ if(WIN32)
list
(
REMOVE_ITEM TEST_OPS test_post_training_quantization_mnist
)
list
(
REMOVE_ITEM TEST_OPS test_post_training_quantization_mnist
)
list
(
REMOVE_ITEM TEST_OPS test_post_training_quantization_mobilenetv1
)
list
(
REMOVE_ITEM TEST_OPS test_post_training_quantization_mobilenetv1
)
list
(
REMOVE_ITEM TEST_OPS test_post_training_quantization_resnet50
)
list
(
REMOVE_ITEM TEST_OPS test_post_training_quantization_resnet50
)
list
(
REMOVE_ITEM TEST_OPS test_post_training_quantization_lstm_model
)
list
(
REMOVE_ITEM TEST_OPS test_weight_quantization_mobilenetv1
)
list
(
REMOVE_ITEM TEST_OPS test_weight_quantization_mobilenetv1
)
list
(
REMOVE_ITEM TEST_OPS test_quantize_transpiler_v2
)
list
(
REMOVE_ITEM TEST_OPS test_quantize_transpiler_v2
)
endif
()
endif
()
...
@@ -300,8 +301,9 @@ endforeach()
...
@@ -300,8 +301,9 @@ endforeach()
# setting timeout value for old unittests
# setting timeout value for old unittests
if
(
NOT WIN32
)
if
(
NOT WIN32
)
set_tests_properties
(
test_post_training_quantization_lstm_model PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_post_training_quantization_mobilenetv1 PROPERTIES TIMEOUT 400 LABELS
"RUN_TYPE=NIGHTLY"
)
set_tests_properties
(
test_post_training_quantization_mobilenetv1 PROPERTIES TIMEOUT 400 LABELS
"RUN_TYPE=NIGHTLY"
)
set_tests_properties
(
test_post_training_quantization_resnet50 PROPERTIES TIMEOUT 400 LABELS
"RUN_TYPE=NIGHTLY"
)
set_tests_properties
(
test_post_training_quantization_resnet50 PROPERTIES TIMEOUT 400 LABELS
"RUN_TYPE=NIGHTLY"
)
set_tests_properties
(
test_post_training_quantization_mnist PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_post_training_quantization_mnist PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_weight_quantization_mobilenetv1 PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_weight_quantization_mobilenetv1 PROPERTIES TIMEOUT 120
)
endif
()
endif
()
...
...
python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_lstm_model.py
0 → 100644
浏览文件 @
5d8d463c
# copyright (c) 2018 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import
unittest
import
os
import
time
import
sys
import
random
import
math
import
functools
import
contextlib
import
struct
import
numpy
as
np
import
paddle
import
paddle.fluid
as
fluid
from
paddle.dataset.common
import
download
from
paddle.fluid.contrib.slim.quantization
import
PostTrainingQuantization
paddle
.
enable_static
()
random
.
seed
(
0
)
np
.
random
.
seed
(
0
)
class
TestPostTrainingQuantization
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
download_path
=
'int8/download'
self
.
cache_folder
=
os
.
path
.
expanduser
(
'~/.cache/paddle/dataset/'
+
self
.
download_path
)
self
.
timestamp
=
time
.
strftime
(
'%Y-%m-%d-%H-%M-%S'
,
time
.
localtime
())
self
.
int8_model_path
=
os
.
path
.
join
(
os
.
getcwd
(),
"post_training_"
+
self
.
timestamp
)
try
:
os
.
system
(
"mkdir -p "
+
self
.
int8_model_path
)
except
Exception
as
e
:
print
(
"Failed to create {} due to {}"
.
format
(
self
.
int8_model_path
,
str
(
e
)))
sys
.
exit
(
-
1
)
def
tearDown
(
self
):
try
:
os
.
system
(
"rm -rf {}"
.
format
(
self
.
int8_model_path
))
except
Exception
as
e
:
print
(
"Failed to delete {} due to {}"
.
format
(
self
.
int8_model_path
,
str
(
e
)))
def
cache_unzipping
(
self
,
target_folder
,
zip_path
):
if
not
os
.
path
.
exists
(
target_folder
):
cmd
=
'mkdir {0} && tar xf {1} -C {0}'
.
format
(
target_folder
,
zip_path
)
os
.
system
(
cmd
)
def
download_model
(
self
,
data_url
,
data_md5
,
folder_name
):
download
(
data_url
,
self
.
download_path
,
data_md5
)
file_name
=
data_url
.
split
(
'/'
)[
-
1
]
zip_path
=
os
.
path
.
join
(
self
.
cache_folder
,
file_name
)
print
(
'Data is downloaded at {0}'
.
format
(
zip_path
))
data_cache_folder
=
os
.
path
.
join
(
self
.
cache_folder
,
folder_name
)
self
.
cache_unzipping
(
data_cache_folder
,
zip_path
)
return
data_cache_folder
def
get_batch_reader
(
self
,
data_path
,
place
):
def
reader
():
with
open
(
data_path
,
'rb'
)
as
in_file
:
while
True
:
plen
=
in_file
.
read
(
4
)
if
plen
is
None
or
len
(
plen
)
!=
4
:
break
alllen
=
struct
.
unpack
(
'i'
,
plen
)[
0
]
label_len
=
alllen
&
0xFFFF
seq_len
=
(
alllen
>>
16
)
&
0xFFFF
label
=
in_file
.
read
(
4
*
label_len
)
label
=
np
.
frombuffer
(
label
,
dtype
=
np
.
int32
).
reshape
([
len
(
label
)
//
4
])
if
label
.
shape
[
0
]
!=
1
or
label
[
0
]
>
6350
:
continue
feat
=
in_file
.
read
(
4
*
seq_len
*
8
)
feat
=
np
.
frombuffer
(
feat
,
dtype
=
np
.
float32
).
reshape
([
len
(
feat
)
//
4
//
8
,
8
])
lod_feat
=
[
feat
.
shape
[
0
]]
minputs
=
fluid
.
create_lod_tensor
(
feat
,
[
lod_feat
],
place
)
yield
[
minputs
]
return
reader
def
get_simple_reader
(
self
,
data_path
,
place
):
def
reader
():
with
open
(
data_path
,
'rb'
)
as
in_file
:
while
True
:
plen
=
in_file
.
read
(
4
)
if
plen
is
None
or
len
(
plen
)
!=
4
:
break
alllen
=
struct
.
unpack
(
'i'
,
plen
)[
0
]
label_len
=
alllen
&
0xFFFF
seq_len
=
(
alllen
>>
16
)
&
0xFFFF
label
=
in_file
.
read
(
4
*
label_len
)
label
=
np
.
frombuffer
(
label
,
dtype
=
np
.
int32
).
reshape
([
len
(
label
)
//
4
])
if
label
.
shape
[
0
]
!=
1
or
label
[
0
]
>
6350
:
continue
feat
=
in_file
.
read
(
4
*
seq_len
*
8
)
feat
=
np
.
frombuffer
(
feat
,
dtype
=
np
.
float32
).
reshape
([
len
(
feat
)
//
4
//
8
,
8
])
lod_feat
=
[
feat
.
shape
[
0
]]
minputs
=
fluid
.
create_lod_tensor
(
feat
,
[
lod_feat
],
place
)
yield
minputs
,
label
return
reader
def
run_program
(
self
,
model_path
,
data_path
,
infer_iterations
):
print
(
"test model path:"
+
model_path
)
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
[
infer_program
,
feed_dict
,
fetch_targets
]
=
\
fluid
.
io
.
load_inference_model
(
model_path
,
exe
)
val_reader
=
self
.
get_simple_reader
(
data_path
,
place
)
all_num
=
0
right_num
=
0
periods
=
[]
for
batch_id
,
(
data
,
label
)
in
enumerate
(
val_reader
()):
t1
=
time
.
time
()
cls_out
,
ctc_out
=
exe
.
run
(
infer_program
,
feed
=
{
feed_dict
[
0
]:
data
},
fetch_list
=
fetch_targets
,
return_numpy
=
False
)
t2
=
time
.
time
()
periods
.
append
(
t2
-
t1
)
cls_out
=
np
.
array
(
cls_out
).
reshape
(
-
1
)
out_cls_label
=
np
.
argmax
(
cls_out
)
all_num
+=
1
if
out_cls_label
==
label
[
0
]:
right_num
+=
1
if
(
batch_id
+
1
)
==
infer_iterations
:
break
latency
=
np
.
average
(
periods
)
acc
=
right_num
/
all_num
return
(
latency
,
acc
)
def
generate_quantized_model
(
self
,
model_path
,
data_path
,
algo
=
"KL"
,
quantizable_op_type
=
[
"conv2d"
],
is_full_quantize
=
False
,
is_use_cache_file
=
False
,
is_optimize_model
=
False
,
batch_size
=
10
,
batch_nums
=
10
):
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
scope
=
fluid
.
global_scope
()
batch_generator
=
self
.
get_batch_reader
(
data_path
,
place
)
ptq
=
PostTrainingQuantization
(
executor
=
exe
,
model_dir
=
model_path
,
batch_generator
=
batch_generator
,
batch_nums
=
batch_nums
,
algo
=
algo
,
quantizable_op_type
=
quantizable_op_type
,
is_full_quantize
=
is_full_quantize
,
optimize_model
=
is_optimize_model
,
is_use_cache_file
=
is_use_cache_file
)
ptq
.
quantize
()
ptq
.
save_quantized_model
(
self
.
int8_model_path
)
def
run_test
(
self
,
model_name
,
model_url
,
model_md5
,
data_name
,
data_url
,
data_md5
,
algo
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
,
infer_iterations
,
quant_iterations
):
fp32_model_path
=
self
.
download_model
(
model_url
,
model_md5
,
model_name
)
fp32_model_path
=
os
.
path
.
join
(
fp32_model_path
,
model_name
)
data_path
=
self
.
download_model
(
data_url
,
data_md5
,
data_name
)
data_path
=
os
.
path
.
join
(
data_path
,
data_name
)
print
(
"Start FP32 inference for {0} on {1} samples ..."
.
format
(
model_name
,
infer_iterations
))
(
fp32_latency
,
fp32_acc
)
=
self
.
run_program
(
fp32_model_path
,
data_path
,
infer_iterations
)
print
(
"Start post training quantization for {0} on {1} samples ..."
.
format
(
model_name
,
quant_iterations
))
self
.
generate_quantized_model
(
fp32_model_path
,
data_path
,
algo
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
quant_iterations
)
print
(
"Start INT8 inference for {0} on {1} samples ..."
.
format
(
model_name
,
infer_iterations
))
(
int8_latency
,
int8_acc
)
=
self
.
run_program
(
self
.
int8_model_path
,
data_path
,
infer_iterations
)
print
(
"---Post training quantization of {} method---"
.
format
(
algo
))
print
(
"FP32 {0}: batch_size {1}, latency {2} s, acc {3}."
.
format
(
model_name
,
1
,
fp32_latency
,
fp32_acc
))
print
(
"INT8 {0}: batch_size {1}, latency {2} s, acc1 {3}.
\n
"
.
format
(
model_name
,
1
,
int8_latency
,
int8_acc
))
sys
.
stdout
.
flush
()
delta_value
=
fp32_acc
-
int8_acc
self
.
assertLess
(
delta_value
,
diff_threshold
)
class
TestPostTrainingKLForMnist
(
TestPostTrainingQuantization
):
def
test_post_training_kl
(
self
):
model_name
=
"nlp_lstm_fp32_model"
model_url
=
"https://paddle-inference-dist.cdn.bcebos.com/int8/unittest_model_data/nlp_lstm_fp32_model.tar.gz"
model_md5
=
"519b8eeac756e7b4b7bcb2868e880452"
data_name
=
"quant_lstm_input_data"
data_url
=
"https://paddle-inference-dist.cdn.bcebos.com/int8/unittest_model_data/quant_lstm_input_data.tar.gz"
data_md5
=
"add84c754e9b792fea1fbd728d134ab7"
algo
=
"KL"
quantizable_op_type
=
[
"mul"
,
"lstm"
]
is_full_quantize
=
False
is_use_cache_file
=
False
is_optimize_model
=
False
diff_threshold
=
0.01
infer_iterations
=
100
quant_iterations
=
10
self
.
run_test
(
model_name
,
model_url
,
model_md5
,
data_name
,
data_url
,
data_md5
,
algo
,
quantizable_op_type
,
is_full_quantize
,
is_use_cache_file
,
is_optimize_model
,
diff_threshold
,
infer_iterations
,
quant_iterations
)
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录