Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleSlim
提交
be558e8d
P
PaddleSlim
项目概览
PaddlePaddle
/
PaddleSlim
大约 1 年 前同步成功
通知
51
Star
1434
Fork
344
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
53
列表
看板
标记
里程碑
合并请求
16
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleSlim
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
53
Issue
53
列表
看板
标记
里程碑
合并请求
16
合并请求
16
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
be558e8d
编写于
1月 12, 2023
作者:
G
Guanghua Yu
提交者:
GitHub
1月 12, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add deploy_backend in PTQ and QAT & update to the default onnx new format (#1601)
上级
e682f51b
变更
10
显示空白变更内容
内联
并排
Showing
10 changed file
with
138 addition
and
115 deletion
+138
-115
paddleslim/auto_compression/compressor.py
paddleslim/auto_compression/compressor.py
+1
-2
paddleslim/auto_compression/utils/fake_ptq.py
paddleslim/auto_compression/utils/fake_ptq.py
+10
-5
paddleslim/dygraph/quant/qat.py
paddleslim/dygraph/quant/qat.py
+2
-2
paddleslim/quant/quant_aware_with_infermodel.py
paddleslim/quant/quant_aware_with_infermodel.py
+1
-3
paddleslim/quant/quanter.py
paddleslim/quant/quanter.py
+65
-28
paddleslim/quant/reconstruction_quantization.py
paddleslim/quant/reconstruction_quantization.py
+2
-2
tests/test_quant_aware.py
tests/test_quant_aware.py
+53
-68
tests/test_quant_aware_user_defined.py
tests/test_quant_aware_user_defined.py
+1
-2
tests/test_quant_aware_with_infermodel.py
tests/test_quant_aware_with_infermodel.py
+2
-1
tests/test_quant_post_quant_aware.py
tests/test_quant_post_quant_aware.py
+1
-2
未找到文件。
paddleslim/auto_compression/compressor.py
浏览文件 @
be558e8d
...
...
@@ -890,8 +890,7 @@ class AutoCompression:
test_program
,
self
.
_places
,
self
.
_quant_config
,
scope
=
paddle
.
static
.
global_scope
(),
save_clip_ranges_path
=
self
.
final_dir
)
scope
=
paddle
.
static
.
global_scope
())
feed_vars
=
[
test_program
.
global_block
().
var
(
name
)
...
...
paddleslim/auto_compression/utils/fake_ptq.py
浏览文件 @
be558e8d
...
...
@@ -3,14 +3,17 @@ import paddle
from
paddle.fluid.framework
import
IrGraph
from
paddle.framework
import
core
from
paddle.static.quantization
import
QuantizationTransformPass
,
QuantizationTransformPassV2
,
AddQuantDequantPass
,
AddQuantDequantPassV2
,
QuantizationFreezePass
,
QuantWeightPass
from
paddle.static.quantization
import
utils
try
:
from
paddle.static.quantization
import
utils
from
paddle.static.quantization
import
quant_config
TRANSFORM_PASS_OP_TYPES
=
list
(
quant_config
.
SUPPORT_WEIGHT_QUANTIZATION_OP_DICT
.
keys
())
QUANT_DEQUANT_PASS_OP_TYPES
=
list
(
quant_config
.
SUPPORT_ACT_QUANTIZATION_OP_DICT
.
keys
())
except
:
TRANSFORM_PASS_OP_TYPES
=
utils
.
_weight_supported_quantizable_op_type
QUANT_DEQUANT_PASS_OP_TYPES
=
utils
.
_act_supported_quantizable_op_type
except
:
TRANSFORM_PASS_OP_TYPES
=
QuantizationTransformPass
.
_supported_quantizable_op_type
QUANT_DEQUANT_PASS_OP_TYPES
=
AddQuantDequantPass
.
_supported_quantizable_op_type
from
...common.load_model
import
load_inference_model
...
...
@@ -155,7 +158,9 @@ def post_quant_fake(executor,
for
block_id
in
range
(
len
(
_program
.
blocks
)):
for
op
in
_program
.
blocks
[
block_id
].
ops
:
if
op
.
type
in
(
_quantizable_op_type
+
utils
.
_out_scale_op_list
):
if
op
.
type
in
(
_quantizable_op_type
+
list
(
quant_config
.
SUPPORT_QUANTIZATION_OP_DICT
.
keys
())):
out_var_names
=
utils
.
_get_op_output_var_names
(
op
)
for
var_name
in
out_var_names
:
analysis_and_save_info
(
op
,
var_name
)
...
...
paddleslim/dygraph/quant/qat.py
浏览文件 @
be558e8d
...
...
@@ -57,8 +57,8 @@ _quant_config_default = {
'quantizable_layer_type'
:
[
'Conv2D'
,
'Linear'
],
# whether fuse conv and bn before QAT
'fuse_conv_bn'
:
False
,
# Whether to export the quantized model with format of ONNX. Default is
Fals
e.
'onnx_format'
:
Fals
e
,
# Whether to export the quantized model with format of ONNX. Default is
Tru
e.
'onnx_format'
:
Tru
e
,
}
...
...
paddleslim/quant/quant_aware_with_infermodel.py
浏览文件 @
be558e8d
...
...
@@ -311,9 +311,7 @@ def export_quant_infermodel(
# operators' order for the inference.
# The dtype of float_program's weights is float32, but in int8 range.
############################################################################################################
float_program
,
int8_program
=
convert
(
test_program
,
place
,
quant_config
,
\
scope
=
scope
,
\
save_int8
=
True
)
float_program
=
convert
(
test_program
,
place
,
quant_config
,
scope
=
scope
)
############################################################################################################
# 4. Save inference model
############################################################################################################
...
...
paddleslim/quant/quanter.py
浏览文件 @
be558e8d
...
...
@@ -41,9 +41,10 @@ try:
from
paddle.static.quantization
import
AddQuantDequantPassV2
from
paddle.static.quantization
import
PostTrainingQuantizationProgram
from
paddle.static.quantization
import
AddQuantDequantForInferencePass
from
paddle.static.quantization
import
quant_config
except
:
_logger
.
warning
(
"Some functions fail
to import, please update PaddlePaddle version to 2.4+
"
"Some functions fail
ed to import, better to update PaddlePaddle to the latest develop version.
"
)
WEIGHT_QUANTIZATION_TYPES
=
[
...
...
@@ -61,12 +62,14 @@ ACTIVATION_QUANTIZATION_TYPES_TENSORRT = [
VALID_DTYPES
=
[
'int8'
]
try
:
TRANSFORM_PASS_OP_TYPES
=
list
(
quant_config
.
SUPPORT_WEIGHT_QUANTIZATION_OP_DICT
.
keys
())
QUANT_DEQUANT_PASS_OP_TYPES
=
list
(
quant_config
.
SUPPORT_ACT_QUANTIZATION_OP_DICT
.
keys
())
except
:
from
paddle.static.quantization
import
utils
TRANSFORM_PASS_OP_TYPES
=
utils
.
_weight_supported_quantizable_op_type
QUANT_DEQUANT_PASS_OP_TYPES
=
utils
.
_act_supported_quantizable_op_type
except
:
TRANSFORM_PASS_OP_TYPES
=
QuantizationTransformPass
.
_supported_quantizable_op_type
QUANT_DEQUANT_PASS_OP_TYPES
=
AddQuantDequantPass
.
_supported_quantizable_op_type
TENSORRT_OP_TYPES
=
[
'mul'
,
'conv2d'
,
'pool2d'
,
'depthwise_conv2d'
,
'elementwise_add'
,
...
...
@@ -99,11 +102,13 @@ _quant_config_default = {
# if True, 'quantoze_op_types' will be TRANSFORM_PASS_OP_TYPES + QUANT_DEQUANT_PASS_OP_TYPES
'is_full_quantize'
:
False
,
# if True, use onnx format to quant.
'onnx_format'
:
Fals
e
,
'onnx_format'
:
Tru
e
,
# quant post to get initial scale for quant_aware
'quant_post_first'
:
False
,
# whether scale can be train
'scale_trainable'
:
True
'scale_trainable'
:
True
,
# Deploy backend, it could be: None, TensorRT, MKLDNN, ARM
'deploy_backend'
:
None
}
...
...
@@ -195,6 +200,32 @@ def _parse_configs(user_config):
assert
isinstance
(
configs
[
'moving_rate'
],
float
),
\
"moving_rate must be float value, The decay coefficient of moving average, default is 0.9."
deploy_backend
=
configs
[
'deploy_backend'
]
assert
not
deploy_backend
or
deploy_backend
.
lower
()
in
[
'tensorrt'
,
'mkldnn'
,
'arm'
],
"Deploy Backend {} not support, please choose None, tensorrt or mkldnn."
.
format
(
deploy_backend
)
try
:
if
not
deploy_backend
:
configs
[
'quant_config'
]
=
quant_config
.
BaseQuantizer
(
quantizable_op_type
=
configs
[
'quantize_op_types'
],
quant_bits
=
configs
[
'weight_bits'
],
)
elif
deploy_backend
.
lower
()
==
"tensorrt"
:
configs
[
'quant_config'
]
=
quant_config
.
TensorRTQuantizer
(
quantizable_op_type
=
configs
[
'quantize_op_types'
],
quant_bits
=
configs
[
'weight_bits'
],
)
elif
deploy_backend
.
lower
()
==
"mkldnn"
:
configs
[
'quant_config'
]
=
quant_config
.
MKLDNNQuantizer
(
quantizable_op_type
=
configs
[
'quantize_op_types'
],
quant_bits
=
configs
[
'weight_bits'
],
)
elif
deploy_backend
.
lower
()
==
"arm"
:
configs
[
'quant_config'
]
=
quant_config
.
ARMCPUQuantizer
(
quantizable_op_type
=
configs
[
'quantize_op_types'
],
quant_bits
=
configs
[
'weight_bits'
],
)
except
:
_logger
.
warning
(
"Set deploy_backend failed, Please update to PaddlePaddle Develop."
)
return
configs
...
...
@@ -388,6 +419,12 @@ def quant_aware(program,
sub_graphs
=
[
sub_graph
for
sub_graph
in
main_graph
.
all_sub_graphs
()]
transform_pass_ops
=
[]
quant_dequant_ops
=
[]
if
'quant_config'
in
config
and
config
[
'quant_config'
]:
transform_pass_ops
=
config
[
'quant_config'
].
weight_quant_operation_types
quant_dequant_ops
=
config
[
'quant_config'
].
activation_quant_operation_types
else
:
for
op_type
in
config
[
'quantize_op_types'
]:
if
op_type
in
TRANSFORM_PASS_OP_TYPES
:
transform_pass_ops
.
append
(
op_type
)
...
...
@@ -486,8 +523,8 @@ def quant_post_static(executor,
hist_percent
=
0.9999
,
bias_correction
=
False
,
quantizable_op_type
=
[
"conv2d"
,
"depthwise_conv2d"
,
"
mul"
,
"matmul
"
,
"matmul_v2"
"conv2d"
,
"depthwise_conv2d"
,
"
conv2d_transpose
"
,
"m
ul"
,
"matmul"
,
"m
atmul_v2"
],
is_full_quantize
=
False
,
weight_bits
=
8
,
...
...
@@ -495,10 +532,9 @@ def quant_post_static(executor,
activation_quantize_type
=
'range_abs_max'
,
weight_quantize_type
=
'channel_wise_abs_max'
,
optimize_model
=
False
,
onnx_format
=
Fals
e
,
onnx_format
=
Tru
e
,
skip_tensor_list
=
None
,
is_use_cache_file
=
False
,
cache_dir
=
"./temp_post_training"
):
deploy_backend
=
None
):
"""
The function utilizes static post training quantization method to
quantize the fp32 model. It uses calibrate data to calculate the
...
...
@@ -568,10 +604,11 @@ def quant_post_static(executor,
optimize_model(bool, optional): If set optimize_model as True, it applies some
passes to optimize the model before quantization. So far, the place of
executor must be cpu it supports fusing batch_norm into convs.
onnx_format(bool): Whether to export the quantized model with format of ONNX. Default is
Fals
e.
onnx_format(bool): Whether to export the quantized model with format of ONNX. Default is
Tru
e.
skip_tensor_list(list): List of skip quant tensor name.
is_use_cache_file(bool): This param is deprecated.
cache_dir(str): This param is deprecated.
deploy_backend(str): Deploy backend, it could be None, TensorRT, MKLDNN, ARM.
Other backends will continue to expand, the default is None, which means to
use the default general quantization configuration.
Returns:
None
...
...
@@ -599,8 +636,10 @@ def quant_post_static(executor,
activation_quantize_type
=
activation_quantize_type
,
weight_quantize_type
=
weight_quantize_type
,
onnx_format
=
onnx_format
,
skip_tensor_list
=
skip_tensor_list
,
# support in Paddle >= 2.3.1
optimize_model
=
optimize_model
)
skip_tensor_list
=
skip_tensor_list
,
optimize_model
=
optimize_model
,
deploy_backend
=
deploy_backend
,
# support at Paddle develop
)
except
:
post_training_quantization
=
PostTrainingQuantization
(
executor
=
executor
,
...
...
@@ -624,6 +663,7 @@ def quant_post_static(executor,
activation_quantize_type
=
activation_quantize_type
,
weight_quantize_type
=
weight_quantize_type
,
onnx_format
=
onnx_format
,
skip_tensor_list
=
skip_tensor_list
,
optimize_model
=
optimize_model
)
post_training_quantization
.
quantize
()
...
...
@@ -639,12 +679,7 @@ def quant_post_static(executor,
quant_post
=
quant_post_static
def
convert
(
program
,
place
,
config
=
None
,
scope
=
None
,
save_int8
=
False
,
save_clip_ranges_path
=
'./'
):
def
convert
(
program
,
place
,
config
=
None
,
scope
=
None
,
save_int8
=
False
):
"""
convert quantized and well-trained ``program`` to final quantized
``program``that can be used to save ``inference model``.
...
...
@@ -666,7 +701,6 @@ def convert(program,
save_int8: Whether to return ``program`` which model parameters'
dtype is ``int8``. This parameter can only be used to
get model size. Default: ``False``.
save_clip_ranges_path: If config.onnx_format=True, quantization clip ranges will be saved locally.
Returns:
Tuple : freezed program which can be used for inference.
...
...
@@ -732,6 +766,9 @@ def convert(program,
persistables
.
extend
(
_op
.
input
(
'X'
))
_op
.
desc
.
set_input
(
"X"
,
persistables
)
assert
not
(
save_int8
and
config
[
'onnx_format'
]
),
"When onnx_format=True, already saved int8 weight,so you can't set save_int8=True."
if
save_int8
:
convert_int8_pass
=
ConvertToInt8Pass
(
scope
=
scope
,
place
=
place
)
for
sub_graph
in
test_graph
.
all_sub_graphs
():
...
...
paddleslim/quant/reconstruction_quantization.py
浏览文件 @
be558e8d
...
...
@@ -180,10 +180,10 @@ class ReconstructionQuantization(PostTrainingQuantization):
# save out_threshold for quantized ops.
self
.
_save_output_threshold
()
if
any
(
op_type
in
self
.
_quantizable_op_type
if
any
(
op_type
in
self
.
quant_config
.
activation_quant_operation_types
for
op_type
in
self
.
_dynamic_quantize_op_type
):
self
.
_collect_dynamic_quantize_op_threshold
(
self
.
_dynamic_quantize_op_type
,
)
self
.
_dynamic_quantize_op_type
)
# Move sub blocks persistable var to global block
global_block
=
self
.
_program
.
global_block
()
...
...
tests/test_quant_aware.py
浏览文件 @
be558e8d
...
...
@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
sys
sys
.
path
.
append
(
"../"
)
import
unittest
...
...
@@ -23,70 +24,7 @@ from layers import conv_bn_layer
import
numpy
as
np
class
TestQuantAwareCase1
(
StaticCase
):
def
get_model
(
self
):
image
=
paddle
.
static
.
data
(
name
=
'image'
,
shape
=
[
None
,
1
,
28
,
28
],
dtype
=
'float32'
)
label
=
paddle
.
static
.
data
(
name
=
'label'
,
shape
=
[
None
,
1
],
dtype
=
'int64'
)
model
=
MobileNet
()
out
=
model
.
net
(
input
=
image
,
class_dim
=
10
)
cost
=
paddle
.
nn
.
functional
.
loss
.
cross_entropy
(
input
=
out
,
label
=
label
)
avg_cost
=
paddle
.
mean
(
x
=
cost
)
startup_prog
=
paddle
.
static
.
default_startup_program
()
train_prog
=
paddle
.
static
.
default_main_program
()
return
startup_prog
,
train_prog
def
get_op_number
(
self
,
prog
):
graph
=
paddle
.
fluid
.
framework
.
IrGraph
(
paddle
.
framework
.
core
.
Graph
(
prog
.
desc
),
for_test
=
False
)
quant_op_nums
=
0
op_nums
=
0
for
op
in
graph
.
all_op_nodes
():
if
op
.
name
()
in
[
'conv2d'
,
'depthwise_conv2d'
,
'mul'
]:
op_nums
+=
1
elif
'fake_'
in
op
.
name
():
quant_op_nums
+=
1
return
op_nums
,
quant_op_nums
def
test_quant_op
(
self
):
startup_prog
,
train_prog
=
self
.
get_model
()
place
=
paddle
.
CUDAPlace
(
0
)
if
paddle
.
is_compiled_with_cuda
(
)
else
paddle
.
CPUPlace
()
exe
=
paddle
.
static
.
Executor
(
place
)
exe
.
run
(
startup_prog
)
config_1
=
{
'weight_quantize_type'
:
'channel_wise_abs_max'
,
'activation_quantize_type'
:
'moving_average_abs_max'
,
'quantize_op_types'
:
[
'depthwise_conv2d'
,
'mul'
,
'conv2d'
],
}
quant_prog_1
=
quant_aware
(
train_prog
,
place
,
config
=
config_1
,
for_test
=
True
)
op_nums_1
,
quant_op_nums_1
=
self
.
get_op_number
(
quant_prog_1
)
convert_prog_1
=
convert
(
quant_prog_1
,
place
,
config
=
config_1
)
convert_op_nums_1
,
convert_quant_op_nums_1
=
self
.
get_op_number
(
convert_prog_1
)
config_1
[
'not_quant_pattern'
]
=
[
'last_fc'
]
quant_prog_2
=
quant_aware
(
train_prog
,
place
,
config
=
config_1
,
for_test
=
True
)
op_nums_2
,
quant_op_nums_2
=
self
.
get_op_number
(
quant_prog_2
)
convert_prog_2
=
convert
(
quant_prog_2
,
place
,
config
=
config_1
)
convert_op_nums_2
,
convert_quant_op_nums_2
=
self
.
get_op_number
(
convert_prog_2
)
self
.
assertTrue
(
op_nums_1
==
op_nums_2
)
# test quant_aware op numbers
self
.
assertTrue
(
op_nums_1
*
4
==
quant_op_nums_1
)
# test convert op numbers
self
.
assertTrue
(
convert_op_nums_1
*
2
==
convert_quant_op_nums_1
)
# test skip_quant
self
.
assertTrue
(
quant_op_nums_1
-
4
==
quant_op_nums_2
)
self
.
assertTrue
(
convert_quant_op_nums_1
-
2
==
convert_quant_op_nums_2
)
class
TestQuantAwareCase2
(
StaticCase
):
class
TestQuantAwareCase
(
StaticCase
):
def
test_accuracy
(
self
):
image
=
paddle
.
static
.
data
(
name
=
'image'
,
shape
=
[
None
,
1
,
28
,
28
],
dtype
=
'float32'
)
...
...
@@ -103,7 +41,7 @@ class TestQuantAwareCase2(StaticCase):
weight_decay
=
paddle
.
regularizer
.
L2Decay
(
4e-5
))
optimizer
.
minimize
(
avg_cost
)
main_prog
=
paddle
.
static
.
default_main_program
()
val_prog
=
main_prog
.
clone
(
for_test
=
True
)
val_prog
=
paddle
.
static
.
default_main_program
()
.
clone
(
for_test
=
True
)
place
=
paddle
.
CUDAPlace
(
0
)
if
paddle
.
is_compiled_with_cuda
(
)
else
paddle
.
CPUPlace
()
...
...
@@ -173,14 +111,61 @@ class TestQuantAwareCase2(StaticCase):
}
quant_train_prog
=
quant_aware
(
main_prog
,
place
,
config
,
for_test
=
False
)
quant_eval_prog
=
quant_aware
(
val_prog
,
place
,
config
,
for_test
=
True
)
op_nums_1
,
quant_op_nums_1
=
self
.
get_op_number
(
quant_eval_prog
)
# test quant_aware op numbers
self
.
assertTrue
(
op_nums_1
*
2
==
quant_op_nums_1
)
train
(
quant_train_prog
)
quant_eval_prog
,
int8_prog
=
convert
(
quant_eval_prog
,
place
,
config
,
save_int8
=
True
)
top1_2
,
top5_2
=
test
(
quan
t_eval_prog
)
convert_eval_prog
=
convert
(
quant_eval_prog
,
place
,
config
)
top1_2
,
top5_2
=
test
(
conver
t_eval_prog
)
# values before quantization and after quantization should be close
print
(
"before quantization: top1: {}, top5: {}"
.
format
(
top1_1
,
top5_1
))
print
(
"after quantization: top1: {}, top5: {}"
.
format
(
top1_2
,
top5_2
))
convert_op_nums_1
,
convert_quant_op_nums_1
=
self
.
get_convert_op_number
(
convert_eval_prog
)
# test convert op numbers
self
.
assertTrue
(
convert_op_nums_1
+
25
==
convert_quant_op_nums_1
)
config
[
'not_quant_pattern'
]
=
[
'last_fc'
]
quant_prog_2
=
quant_aware
(
main_prog
,
place
,
config
=
config
,
for_test
=
True
)
op_nums_2
,
quant_op_nums_2
=
self
.
get_op_number
(
quant_prog_2
)
convert_prog_2
=
convert
(
quant_prog_2
,
place
,
config
=
config
)
convert_op_nums_2
,
convert_quant_op_nums_2
=
self
.
get_convert_op_number
(
convert_prog_2
)
self
.
assertTrue
(
op_nums_1
==
op_nums_2
)
# test skip_quant
self
.
assertTrue
(
quant_op_nums_1
-
2
==
quant_op_nums_2
)
self
.
assertTrue
(
convert_quant_op_nums_1
==
convert_quant_op_nums_2
)
def
get_op_number
(
self
,
prog
):
graph
=
paddle
.
fluid
.
framework
.
IrGraph
(
paddle
.
framework
.
core
.
Graph
(
prog
.
desc
),
for_test
=
False
)
quant_op_nums
=
0
op_nums
=
0
for
op
in
graph
.
all_op_nodes
():
if
op
.
name
()
in
[
'conv2d'
,
'depthwise_conv2d'
,
'mul'
]:
op_nums
+=
1
elif
op
.
name
()
==
'quantize_linear'
:
quant_op_nums
+=
1
return
op_nums
,
quant_op_nums
def
get_convert_op_number
(
self
,
prog
):
graph
=
paddle
.
fluid
.
framework
.
IrGraph
(
paddle
.
framework
.
core
.
Graph
(
prog
.
desc
),
for_test
=
True
)
quant_op_nums
=
0
op_nums
=
0
dequant_num
=
0
for
op
in
graph
.
all_op_nodes
():
if
op
.
name
()
not
in
[
'quantize_linear'
,
'dequantize_linear'
]:
op_nums
+=
1
elif
op
.
name
()
==
'quantize_linear'
:
quant_op_nums
+=
1
return
op_nums
,
quant_op_nums
if
__name__
==
'__main__'
:
unittest
.
main
()
tests/test_quant_aware_user_defined.py
浏览文件 @
be558e8d
...
...
@@ -153,8 +153,7 @@ class TestQuantAwareCase1(StaticCase):
quant_eval_prog
=
quant_aware
(
val_prog
,
place
,
config
,
for_test
=
True
)
train
(
quant_train_prog_pact
)
quant_eval_prog
,
int8_prog
=
convert
(
quant_eval_prog
,
place
,
config
,
save_int8
=
True
)
quant_eval_prog
=
convert
(
quant_eval_prog
,
place
,
config
)
top1_2
,
top5_2
=
test
(
quant_eval_prog
)
# values before quantization and after quantization should be close
print
(
"before quantization: top1: {}, top5: {}"
.
format
(
top1_1
,
top5_1
))
...
...
tests/test_quant_aware_with_infermodel.py
浏览文件 @
be558e8d
...
...
@@ -130,7 +130,8 @@ class TestQuantAwareWithInferModelCase1(StaticCase):
'weight_quantize_type'
:
'channel_wise_abs_max'
,
'activation_quantize_type'
:
'moving_average_abs_max'
,
'not_quant_pattern'
:
[
'skip_quant'
],
'quantize_op_types'
:
[
'conv2d'
,
'depthwise_conv2d'
,
'mul'
]
'quantize_op_types'
:
[
'conv2d'
,
'depthwise_conv2d'
,
'mul'
],
'onnx_format'
:
False
}
train_config
=
{
"num_epoch"
:
1
,
# training epoch num
...
...
tests/test_quant_post_quant_aware.py
浏览文件 @
be558e8d
...
...
@@ -143,8 +143,7 @@ class TestQuantPostQuantAwareCase1(StaticCase):
scale_dict
=
scale_dict
,
model_type
=
'transformer'
)
train
(
quant_train_prog
)
quant_eval_prog
,
int8_prog
=
convert
(
quant_eval_prog
,
place
,
config
,
save_int8
=
True
)
quant_eval_prog
=
convert
(
quant_eval_prog
,
place
,
config
)
top1_2
=
test
(
quant_eval_prog
)
# values before quantization and after quantization should be close
print
(
"before quantization: top1: {}"
.
format
(
top1_1
))
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录