Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
5050e761
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
5050e761
编写于
11月 18, 2020
作者:
B
Bai Yifan
提交者:
GitHub
11月 18, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Support user-defined activation/weight quantize and preprocess. (#28570)
* support user-defined quant and preprocess
上级
11e32baf
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
373 addition
and
39 deletion
+373
-39
python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
.../paddle/fluid/contrib/slim/quantization/imperative/qat.py
+43
-3
python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
...le/fluid/contrib/slim/quantization/imperative/quant_nn.py
+82
-36
python/paddle/fluid/contrib/slim/tests/test_imperative_qat_user_defined.py
...id/contrib/slim/tests/test_imperative_qat_user_defined.py
+248
-0
未找到文件。
python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
浏览文件 @
5050e761
...
...
@@ -59,7 +59,11 @@ class ImperativeQuantAware(object):
weight_quantize_type
=
'abs_max'
,
activation_quantize_type
=
'moving_average_abs_max'
,
moving_rate
=
0.9
,
quantizable_layer_type
=
[
'Conv2D'
,
'Linear'
]):
quantizable_layer_type
=
[
'Conv2D'
,
'Linear'
],
weight_preprocess_layer
=
None
,
act_preprocess_layer
=
None
,
weight_quantize_layer
=
None
,
act_quantize_layer
=
None
):
"""
The constructor for ImperativeQuantAware.
...
...
@@ -81,7 +85,28 @@ class ImperativeQuantAware(object):
quantizable_op_type(list[str]): List the type of layers that will be quantized.
Default is ['Conv2D', 'Linear']. The quantizable_op_type in
QuantizationFreezePass and ConvertToInt8Pass must be the same as this.
weight_preprocess_layer(paddle.nn.Layer, optional): A paddle Layer that defines how to preprocess
weight before quantization. Using this can quickly test if user's
preprocess method works or not. The input is non-quantized
weight and function returns processed weight to be quantized.
If None, the weight will be quantized directly. Default is None.
act_preprocess_layer(paddle.nn.Layer, optional): A paddle Layer that defines how to preprocess
activation before quantization. Using this can quickly test if user's
preprocess method works or not. The input is non-quantized
activation and function returns processed activation to be quantized.
If None, the activation will be quantized directly. Default is None.
weight_quantize_layer(paddle.nn.Layer, optional): A paddle Layer that defines how to quantize weight.
Using this can quickly test if user's quantization method works or not.
In this layer, user should both define quantization method and
dequantization method, that is, the function's input is non-quantized
weight and returns dequantized weight. If None, will use
quantization op defined by 'weight_quantize_type'. Default is None.
act_quantize_layer(paddle.nn.Layer, optional): A paddle Layer that defines how to quantize activation.
Using this can quickly test if user's quantization method works or not.
In this layer, user should both define quantization method and
dequantization method, that is, the function's input is non-quantized
activation and returns dequantized activation. If None, will use
quantization op defined by 'activation_quantize_type'. Default is None.
Examples:
.. code-block:: python
...
...
@@ -118,6 +143,19 @@ class ImperativeQuantAware(object):
self
.
_activation_bits
=
activation_bits
self
.
_moving_rate
=
moving_rate
self
.
_weight_pre_layer
=
weight_preprocess_layer
self
.
_act_pre_layer
=
act_preprocess_layer
self
.
_weight_quant_layer
=
weight_quantize_layer
self
.
_act_quant_layer
=
act_quantize_layer
t_check
=
lambda
method
:
method
is
None
or
issubclass
(
method
,
dygraph
.
layers
.
Layer
)
assert
t_check
(
self
.
_weight_pre_layer
),
"weight_preprocess should be nn.Layer"
assert
t_check
(
self
.
_act_pre_layer
),
"act_preprocess should be nn.Layer"
assert
t_check
(
self
.
_weight_quant_layer
),
"weight_quantize should be nn.Layer"
assert
t_check
(
self
.
_act_quant_layer
),
"act_quantize should be nn.Layer"
quant_type
=
{
'abs_max'
,
'moving_average_abs_max'
,
'channel_wise_abs_max'
}
...
...
@@ -189,7 +227,9 @@ class ImperativeQuantAware(object):
quantized_layer
=
quant_nn
.
__dict__
[
quantized_counterpart
[
index
]](
layer
,
self
.
_weight_bits
,
self
.
_activation_bits
,
self
.
_moving_rate
,
self
.
_weight_quantize_type
,
self
.
_activation_quantize_type
)
self
.
_weight_quantize_type
,
self
.
_activation_quantize_type
,
self
.
_weight_pre_layer
,
self
.
_act_pre_layer
,
self
.
_weight_quant_layer
,
self
.
_act_quant_layer
)
return
quantized_layer
...
...
python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
浏览文件 @
5050e761
...
...
@@ -332,7 +332,11 @@ class QuantizedConv2D(layers.Layer):
activation_bits
=
8
,
moving_rate
=
0.9
,
weight_quantize_type
=
'abs_max'
,
activation_quantize_type
=
'abs_max'
):
activation_quantize_type
=
'abs_max'
,
weight_pre_layer
=
None
,
act_pre_layer
=
None
,
weight_quant_layer
=
None
,
act_quant_layer
=
None
):
super
(
QuantizedConv2D
,
self
).
__init__
()
# For Conv2D
self
.
_groups
=
getattr
(
layer
,
'_groups'
)
...
...
@@ -347,26 +351,44 @@ class QuantizedConv2D(layers.Layer):
self
.
bias
=
getattr
(
layer
,
'bias'
)
# For FakeQuant
self
.
_conv2d_quant_axis
=
0
self
.
_fake_quant_weight
=
_get_fake_quant_type
(
weight_quantize_type
,
name
=
self
.
weight
.
name
,
moving_rate
=
moving_rate
,
quant_bits
=
weight_bits
,
dtype
=
self
.
_dtype
,
quant_on_weight
=
True
,
channel_num
=
self
.
weight
.
shape
[
self
.
_conv2d_quant_axis
],
quant_axis
=
self
.
_conv2d_quant_axis
)
self
.
_fake_quant_input
=
_get_fake_quant_type
(
activation_quantize_type
,
name
=
layer
.
full_name
(),
moving_rate
=
moving_rate
,
quant_bits
=
activation_bits
,
dtype
=
self
.
_dtype
,
quant_on_weight
=
False
)
if
weight_quant_layer
is
not
None
:
self
.
_fake_quant_weight
=
weight_quant_layer
()
else
:
self
.
_fake_quant_weight
=
_get_fake_quant_type
(
weight_quantize_type
,
name
=
self
.
weight
.
name
,
moving_rate
=
moving_rate
,
quant_bits
=
weight_bits
,
dtype
=
self
.
_dtype
,
quant_on_weight
=
True
,
channel_num
=
self
.
weight
.
shape
[
self
.
_conv2d_quant_axis
],
quant_axis
=
self
.
_conv2d_quant_axis
)
if
act_quant_layer
is
not
None
:
self
.
_fake_quant_input
=
act_quant_layer
()
else
:
self
.
_fake_quant_input
=
_get_fake_quant_type
(
activation_quantize_type
,
name
=
layer
.
full_name
(),
moving_rate
=
moving_rate
,
quant_bits
=
activation_bits
,
dtype
=
self
.
_dtype
,
quant_on_weight
=
False
)
self
.
_act_preprocess
=
act_pre_layer
(
)
if
act_pre_layer
is
not
None
else
None
self
.
_weight_preprocess
=
weight_pre_layer
(
)
if
weight_pre_layer
is
not
None
else
None
def
forward
(
self
,
input
):
if
self
.
_act_preprocess
is
not
None
:
input
=
self
.
_act_preprocess
(
input
)
quant_input
=
self
.
_fake_quant_input
(
input
)
quant_weight
=
self
.
_fake_quant_weight
(
self
.
weight
)
weight
=
self
.
weight
if
self
.
_weight_preprocess
is
not
None
:
weight
=
self
.
_weight_preprocess
(
self
.
weight
)
quant_weight
=
self
.
_fake_quant_weight
(
weight
)
if
in_dygraph_mode
()
and
self
.
_l_type
==
'conv2d'
:
attrs
=
(
'strides'
,
self
.
_stride
,
'paddings'
,
self
.
_padding
,
...
...
@@ -428,7 +450,11 @@ class QuantizedLinear(layers.Layer):
activation_bits
=
8
,
moving_rate
=
0.9
,
weight_quantize_type
=
'abs_max'
,
activation_quantize_type
=
'abs_max'
):
activation_quantize_type
=
'abs_max'
,
weight_pre_layer
=
None
,
act_pre_layer
=
None
,
weight_quant_layer
=
None
,
act_quant_layer
=
None
):
super
(
QuantizedLinear
,
self
).
__init__
()
# For Linear
self
.
_act
=
getattr
(
layer
,
'_act'
)
...
...
@@ -437,26 +463,46 @@ class QuantizedLinear(layers.Layer):
self
.
bias
=
getattr
(
layer
,
'bias'
)
# For FakeQuant
self
.
_linear_quant_axis
=
1
self
.
_fake_quant_weight
=
_get_fake_quant_type
(
weight_quantize_type
,
name
=
self
.
weight
.
name
,
moving_rate
=
moving_rate
,
quant_bits
=
weight_bits
,
dtype
=
self
.
_dtype
,
quant_on_weight
=
True
,
channel_num
=
self
.
weight
.
shape
[
self
.
_linear_quant_axis
],
quant_axis
=
self
.
_linear_quant_axis
)
self
.
_fake_quant_input
=
_get_fake_quant_type
(
activation_quantize_type
,
name
=
layer
.
full_name
(),
moving_rate
=
moving_rate
,
quant_bits
=
activation_bits
,
dtype
=
self
.
_dtype
,
quant_on_weight
=
False
)
if
weight_quant_layer
is
not
None
:
self
.
_fake_quant_weight
=
weight_quant_layer
()
else
:
self
.
_fake_quant_weight
=
_get_fake_quant_type
(
weight_quantize_type
,
name
=
self
.
weight
.
name
,
moving_rate
=
moving_rate
,
quant_bits
=
weight_bits
,
dtype
=
self
.
_dtype
,
quant_on_weight
=
True
,
channel_num
=
self
.
weight
.
shape
[
self
.
_linear_quant_axis
],
quant_axis
=
self
.
_linear_quant_axis
)
if
act_quant_layer
is
not
None
:
self
.
_fake_quant_input
=
act_quant_layer
()
else
:
self
.
_fake_quant_input
=
_get_fake_quant_type
(
activation_quantize_type
,
name
=
layer
.
full_name
(),
moving_rate
=
moving_rate
,
quant_bits
=
activation_bits
,
dtype
=
self
.
_dtype
,
quant_on_weight
=
False
)
self
.
_act_preprocess
=
act_pre_layer
(
)
if
act_pre_layer
is
not
None
else
None
self
.
_weight_preprocess
=
weight_pre_layer
(
)
if
weight_pre_layer
is
not
None
else
None
def
forward
(
self
,
input
):
if
self
.
_act_preprocess
is
not
None
:
input
=
self
.
_act_preprocess
(
input
)
quant_input
=
self
.
_fake_quant_input
(
input
)
quant_weight
=
self
.
_fake_quant_weight
(
self
.
weight
)
weight
=
self
.
weight
if
self
.
_weight_preprocess
is
not
None
:
weight
=
self
.
_weight_preprocess
(
self
.
weight
)
quant_weight
=
self
.
_fake_quant_weight
(
weight
)
if
in_dygraph_mode
():
pre_bias
=
_varbase_creator
(
dtype
=
input
.
dtype
)
core
.
ops
.
matmul
(
quant_input
,
quant_weight
,
pre_bias
,
'transpose_X'
,
...
...
python/paddle/fluid/contrib/slim/tests/test_imperative_qat_user_defined.py
0 → 100644
浏览文件 @
5050e761
# copyright (c) 2020 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
from
__future__
import
print_function
import
os
import
numpy
as
np
import
random
import
unittest
import
logging
import
paddle
import
paddle.nn
as
nn
from
paddle.optimizer
import
Adam
from
paddle.fluid.contrib.slim.quantization
import
ImperativeQuantAware
from
paddle.fluid.contrib.slim.quantization
import
QuantizationTransformPass
from
paddle.nn
import
Sequential
from
paddle.fluid.dygraph
import
Conv2D
from
paddle.nn
import
Pool2D
from
paddle.fluid.dygraph
import
Linear
from
paddle.fluid.log_helper
import
get_logger
os
.
environ
[
"CPU_NUM"
]
=
"1"
_logger
=
get_logger
(
__name__
,
logging
.
INFO
,
fmt
=
'%(asctime)s-%(levelname)s: %(message)s'
)
class
PACT
(
nn
.
Layer
):
def
__init__
(
self
,
init_value
=
20
):
super
(
PACT
,
self
).
__init__
()
alpha_attr
=
paddle
.
ParamAttr
(
name
=
self
.
full_name
()
+
".pact"
,
initializer
=
paddle
.
nn
.
initializer
.
Constant
(
value
=
init_value
))
self
.
alpha
=
self
.
create_parameter
(
shape
=
[
1
],
attr
=
alpha_attr
,
dtype
=
'float32'
)
def
forward
(
self
,
x
):
out_left
=
paddle
.
nn
.
functional
.
relu
(
x
-
self
.
alpha
)
out_right
=
paddle
.
nn
.
functional
.
relu
(
-
self
.
alpha
-
x
)
x
=
x
-
out_left
+
out_right
return
x
class
CustomQAT
(
nn
.
Layer
):
def
__init__
(
self
):
super
(
CustomQAT
,
self
).
__init__
()
attr
=
paddle
.
ParamAttr
(
initializer
=
paddle
.
nn
.
initializer
.
Constant
(
value
=
1.0
))
self
.
u_param
=
self
.
create_parameter
(
shape
=
[
1
],
attr
=
attr
,
dtype
=
'float32'
)
self
.
l_param
=
self
.
create_parameter
(
shape
=
[
1
],
attr
=
attr
,
dtype
=
'float32'
)
self
.
alpha_param
=
self
.
create_parameter
(
shape
=
[
1
],
attr
=
attr
,
dtype
=
'float32'
)
self
.
upper
=
self
.
create_parameter
(
shape
=
[
1
],
attr
=
attr
,
dtype
=
'float32'
)
self
.
upper
.
stop_gradient
=
True
self
.
lower
=
self
.
create_parameter
(
shape
=
[
1
],
attr
=
attr
,
dtype
=
'float32'
)
self
.
lower
.
stop_gradient
=
True
def
forward
(
self
,
x
):
def
clip
(
x
,
upper
,
lower
):
x
=
x
+
paddle
.
nn
.
functional
.
relu
(
lower
-
x
)
x
=
x
-
paddle
.
nn
.
functional
.
relu
(
x
-
upper
)
return
x
def
phi_function
(
x
,
mi
,
alpha
,
delta
):
s
=
1
/
(
1
-
alpha
)
k
=
paddle
.
log
(
2
/
alpha
-
1
)
*
(
1
/
delta
)
x
=
(
paddle
.
tanh
((
x
-
mi
)
*
k
))
*
s
return
x
def
dequantize
(
x
,
lower_bound
,
delta
,
interval
):
x
=
((
x
+
1
)
/
2
+
interval
)
*
delta
+
lower_bound
return
x
bit
=
8
bit_range
=
2
**
bit
-
1
paddle
.
assign
(
self
.
upper
*
0.9
+
self
.
u_param
*
0.1
,
self
.
upper
)
paddle
.
assign
(
self
.
lower
*
0.9
+
self
.
l_param
*
0.1
,
self
.
lower
)
x
=
clip
(
x
,
self
.
upper
,
self
.
lower
)
delta
=
(
self
.
upper
-
self
.
lower
)
/
bit_range
interval
=
(
x
-
self
.
lower
)
/
delta
mi
=
(
interval
+
0.5
)
*
delta
+
self
.
l_param
x
=
phi_function
(
x
,
mi
,
self
.
alpha_param
,
delta
)
x
=
dequantize
(
x
,
self
.
l_param
,
delta
,
interval
)
return
x
class
ImperativeLenet
(
paddle
.
nn
.
Layer
):
def
__init__
(
self
,
num_classes
=
10
,
classifier_activation
=
'softmax'
):
super
(
ImperativeLenet
,
self
).
__init__
()
self
.
features
=
Sequential
(
Conv2D
(
num_channels
=
1
,
num_filters
=
6
,
filter_size
=
3
,
stride
=
1
,
padding
=
1
),
Pool2D
(
pool_size
=
2
,
pool_type
=
'max'
,
pool_stride
=
2
),
Conv2D
(
num_channels
=
6
,
num_filters
=
16
,
filter_size
=
5
,
stride
=
1
,
padding
=
0
),
Pool2D
(
pool_size
=
2
,
pool_type
=
'max'
,
pool_stride
=
2
))
self
.
fc
=
Sequential
(
Linear
(
input_dim
=
400
,
output_dim
=
120
),
Linear
(
input_dim
=
120
,
output_dim
=
84
),
Linear
(
input_dim
=
84
,
output_dim
=
num_classes
,
act
=
classifier_activation
))
def
forward
(
self
,
inputs
):
x
=
self
.
features
(
inputs
)
x
=
paddle
.
flatten
(
x
,
1
)
x
=
self
.
fc
(
x
)
return
x
class
TestUserDefinedActPreprocess
(
unittest
.
TestCase
):
def
setUp
(
self
):
_logger
.
info
(
"test act_preprocess"
)
self
.
imperative_qat
=
ImperativeQuantAware
(
act_preprocess_layer
=
PACT
)
def
test_quant_aware_training
(
self
):
imperative_qat
=
self
.
imperative_qat
seed
=
1
np
.
random
.
seed
(
seed
)
paddle
.
static
.
default_main_program
().
random_seed
=
seed
paddle
.
static
.
default_startup_program
().
random_seed
=
seed
lenet
=
ImperativeLenet
()
fixed_state
=
{}
param_init_map
=
{}
for
name
,
param
in
lenet
.
named_parameters
():
p_shape
=
param
.
numpy
().
shape
p_value
=
param
.
numpy
()
if
name
.
endswith
(
"bias"
):
value
=
np
.
zeros_like
(
p_value
).
astype
(
'float32'
)
else
:
value
=
np
.
random
.
normal
(
loc
=
0.0
,
scale
=
0.01
,
size
=
np
.
product
(
p_shape
)).
reshape
(
p_shape
).
astype
(
'float32'
)
fixed_state
[
name
]
=
value
param_init_map
[
param
.
name
]
=
value
lenet
.
set_dict
(
fixed_state
)
imperative_qat
.
quantize
(
lenet
)
adam
=
Adam
(
learning_rate
=
0.001
,
parameters
=
lenet
.
parameters
())
dynamic_loss_rec
=
[]
def
train
(
model
):
adam
=
Adam
(
learning_rate
=
0.001
,
parameters
=
model
.
parameters
())
epoch_num
=
1
for
epoch
in
range
(
epoch_num
):
model
.
train
()
for
batch_id
,
data
in
enumerate
(
train_reader
()):
x_data
=
np
.
array
([
x
[
0
].
reshape
(
1
,
28
,
28
)
for
x
in
data
]).
astype
(
'float32'
)
y_data
=
np
.
array
(
[
x
[
1
]
for
x
in
data
]).
astype
(
'int64'
).
reshape
(
-
1
,
1
)
img
=
paddle
.
to_tensor
(
x_data
)
label
=
paddle
.
to_tensor
(
y_data
)
out
=
model
(
img
)
acc
=
paddle
.
metric
.
accuracy
(
out
,
label
,
k
=
1
)
loss
=
nn
.
functional
.
loss
.
cross_entropy
(
out
,
label
)
avg_loss
=
paddle
.
mean
(
loss
)
avg_loss
.
backward
()
adam
.
minimize
(
avg_loss
)
model
.
clear_gradients
()
if
batch_id
%
50
==
0
:
_logger
.
info
(
"Train | At epoch {} step {}: loss = {:}, acc= {:}"
.
format
(
epoch
,
batch_id
,
avg_loss
.
numpy
(),
acc
.
numpy
()))
break
def
test
(
model
):
model
.
eval
()
avg_acc
=
[[],
[]]
for
batch_id
,
data
in
enumerate
(
test_reader
()):
x_data
=
np
.
array
([
x
[
0
].
reshape
(
1
,
28
,
28
)
for
x
in
data
]).
astype
(
'float32'
)
y_data
=
np
.
array
(
[
x
[
1
]
for
x
in
data
]).
astype
(
'int64'
).
reshape
(
-
1
,
1
)
img
=
paddle
.
to_tensor
(
x_data
)
label
=
paddle
.
to_tensor
(
y_data
)
out
=
model
(
img
)
acc_top1
=
paddle
.
metric
.
accuracy
(
input
=
out
,
label
=
label
,
k
=
1
)
acc_top5
=
paddle
.
metric
.
accuracy
(
input
=
out
,
label
=
label
,
k
=
5
)
avg_acc
[
0
].
append
(
acc_top1
.
numpy
())
avg_acc
[
1
].
append
(
acc_top5
.
numpy
())
if
batch_id
%
100
==
0
:
_logger
.
info
(
"Test | step {}: acc1 = {:}, acc5 = {:}"
.
format
(
batch_id
,
acc_top1
.
numpy
(),
acc_top5
.
numpy
()))
train_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
train
(),
batch_size
=
512
,
drop_last
=
True
)
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
512
)
train
(
lenet
)
test
(
lenet
)
class
TestUserDefinedWeightPreprocess
(
TestUserDefinedActPreprocess
):
def
setUp
(
self
):
_logger
.
info
(
"test weight_preprocess"
)
self
.
imperative_qat
=
ImperativeQuantAware
(
weight_preprocess_layer
=
PACT
)
class
TestUserDefinedActQuantize
(
TestUserDefinedActPreprocess
):
def
setUp
(
self
):
_logger
.
info
(
"test act_quantize"
)
self
.
imperative_qat
=
ImperativeQuantAware
(
act_quantize_layer
=
CustomQAT
)
class
TestUserDefinedWeightQuantize
(
TestUserDefinedActPreprocess
):
def
setUp
(
self
):
_logger
.
info
(
"test weight_quantize"
)
self
.
imperative_qat
=
ImperativeQuantAware
(
weight_quantize_layer
=
CustomQAT
)
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录