Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
ec88b6cc
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ec88b6cc
编写于
3月 19, 2019
作者:
Z
Zhen Wang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add channel wise quantization in ir pass.
上级
81b4fad8
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
290 addition
and
70 deletion
+290
-70
paddle/fluid/operators/fake_dequantize_op.h
paddle/fluid/operators/fake_dequantize_op.h
+33
-13
paddle/fluid/operators/fake_quantize_op.cc
paddle/fluid/operators/fake_quantize_op.cc
+4
-4
paddle/fluid/operators/fake_quantize_op.h
paddle/fluid/operators/fake_quantize_op.h
+4
-4
python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
...ddle/fluid/contrib/slim/quantization/quantization_pass.py
+196
-27
python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
...paddle/fluid/contrib/slim/tests/test_quantization_pass.py
+20
-8
python/paddle/fluid/tests/unittests/test_fake_dequantize_op.py
...n/paddle/fluid/tests/unittests/test_fake_dequantize_op.py
+32
-13
python/paddle/fluid/tests/unittests/test_fake_quantize_op.py
python/paddle/fluid/tests/unittests/test_fake_quantize_op.py
+1
-1
未找到文件。
paddle/fluid/operators/fake_dequantize_op.h
浏览文件 @
ec88b6cc
...
...
@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#include <vector>
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
...
...
@@ -54,10 +55,6 @@ class FakeChannelWiseDequantizeMaxAbsKernel : public framework::OpKernel<T> {
auto
scales
=
ctx
.
MultiInput
<
framework
::
Tensor
>
(
"Scales"
);
auto
*
out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
PADDLE_ENFORCE_EQ
(
scales
[
0
]
->
numel
(),
in
->
dims
()[
0
],
"The number of first scale values must be the same with "
"first dimension value of Input(X)."
);
auto
quant_bits
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"quant_bits"
);
int
max_range
=
std
::
pow
(
2
,
quant_bits
[
0
]
-
1
)
-
1
;
...
...
@@ -65,15 +62,38 @@ class FakeChannelWiseDequantizeMaxAbsKernel : public framework::OpKernel<T> {
out
->
mutable_data
<
T
>
(
dev_ctx
.
GetPlace
());
auto
dequant
=
DequantizeFunctor
<
DeviceContext
,
T
>
();
for
(
int64_t
i
=
0
;
i
<
in
->
dims
()[
0
];
i
++
)
{
framework
::
Tensor
one_channel_in
=
in
->
Slice
(
i
,
i
+
1
);
framework
::
Tensor
one_channel_out
=
out
->
Slice
(
i
,
i
+
1
);
framework
::
Tensor
one_channel_scale
=
scales
[
0
]
->
Slice
(
i
,
i
+
1
);
dequant
(
dev_ctx
,
&
one_channel_in
,
&
one_channel_scale
,
static_cast
<
T
>
(
max_range
),
&
one_channel_out
);
}
if
(
scales
.
size
()
==
2
)
{
if
(
scales
.
size
()
==
1
)
{
PADDLE_ENFORCE_EQ
(
scales
[
0
]
->
numel
(),
in
->
dims
()[
0
],
"The number of first scale values must be the same with "
"first dimension value of Input(X) when the `Scales` has only one "
"element."
);
for
(
int64_t
i
=
0
;
i
<
in
->
dims
()[
0
];
i
++
)
{
framework
::
Tensor
one_channel_in
=
in
->
Slice
(
i
,
i
+
1
);
framework
::
Tensor
one_channel_out
=
out
->
Slice
(
i
,
i
+
1
);
framework
::
Tensor
one_channel_scale
=
scales
[
0
]
->
Slice
(
i
,
i
+
1
);
dequant
(
dev_ctx
,
&
one_channel_in
,
&
one_channel_scale
,
static_cast
<
T
>
(
max_range
),
&
one_channel_out
);
}
}
else
if
(
scales
.
size
()
==
2
)
{
PADDLE_ENFORCE_EQ
(
scales
[
0
]
->
numel
(),
in
->
dims
()[
1
],
"The number of first scale values must be the same with "
"second dimension value of Input(X) when the `Scales` has two "
"elements."
);
for
(
int64_t
i
=
0
;
i
<
in
->
dims
()[
0
];
i
++
)
{
framework
::
Tensor
one_batch_in
=
in
->
Slice
(
i
,
i
+
1
).
Resize
(
framework
::
slice_ddim
(
in
->
dims
(),
1
,
in
->
dims
().
size
()));
framework
::
Tensor
one_batch_out
=
out
->
Slice
(
i
,
i
+
1
).
Resize
(
framework
::
slice_ddim
(
out
->
dims
(),
1
,
out
->
dims
().
size
()));
for
(
int64_t
j
=
0
;
j
<
in
->
dims
()[
1
];
j
++
)
{
framework
::
Tensor
one_channel_in
=
one_batch_in
.
Slice
(
j
,
j
+
1
);
framework
::
Tensor
one_channel_out
=
one_batch_out
.
Slice
(
j
,
j
+
1
);
framework
::
Tensor
one_channel_scale
=
scales
[
0
]
->
Slice
(
j
,
j
+
1
);
dequant
(
dev_ctx
,
&
one_channel_in
,
&
one_channel_scale
,
static_cast
<
T
>
(
max_range
),
&
one_channel_out
);
}
}
PADDLE_ENFORCE_EQ
(
scales
[
1
]
->
numel
(),
1
,
"The second scale tensor should only have one value at now."
);
...
...
paddle/fluid/operators/fake_quantize_op.cc
浏览文件 @
ec88b6cc
...
...
@@ -169,10 +169,10 @@ class FakeChannelWiseQuantizeAbsMaxOp : public framework::OperatorWithKernel {
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of FakeChannelWiseQuantizeOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"OutScale
s
"
),
"Output(Scale
s
) of FakeChannelWiseQuantizeOp should not be null."
);
ctx
->
HasOutput
(
"OutScale"
),
"Output(Scale) of FakeChannelWiseQuantizeOp should not be null."
);
ctx
->
SetOutputDim
(
"Out"
,
ctx
->
GetInputDim
(
"X"
));
ctx
->
SetOutputDim
(
"OutScale
s
"
,
{
ctx
->
GetInputDim
(
"X"
)[
0
]});
ctx
->
SetOutputDim
(
"OutScale"
,
{
ctx
->
GetInputDim
(
"X"
)[
0
]});
ctx
->
ShareLoD
(
"X"
,
/*->*/
"Out"
);
}
...
...
@@ -192,7 +192,7 @@ class FakeChannelWiseQuantizeAbsMaxOpMaker
AddOutput
(
"Out"
,
"(Tensor) Output of quantized low level tensor, "
"but also saved as float data type."
);
AddOutput
(
"OutScale
s
"
,
"(Tensor) Current channel wise scale"
);
AddOutput
(
"OutScale"
,
"(Tensor) Current channel wise scale"
);
AddAttr
<
int
>
(
"bit_length"
,
"(int, default 8)"
)
.
SetDefault
(
8
)
.
AddCustomChecker
([](
const
int
&
bit_length
)
{
...
...
paddle/fluid/operators/fake_quantize_op.h
浏览文件 @
ec88b6cc
...
...
@@ -78,8 +78,8 @@ class FakeChannelWiseQuantizeAbsMaxKernel : public framework::OpKernel<T> {
auto
*
in
=
context
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
out
=
context
.
Output
<
framework
::
Tensor
>
(
"Out"
);
auto
*
out_scale
s
=
context
.
Output
<
framework
::
Tensor
>
(
"OutScales
"
);
T
*
out_scale
s_data
=
out_scales
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
*
out_scale
=
context
.
Output
<
framework
::
Tensor
>
(
"OutScale
"
);
T
*
out_scale
_data
=
out_scale
->
mutable_data
<
T
>
(
context
.
GetPlace
());
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
int
bit_length
=
context
.
Attr
<
int
>
(
"bit_length"
);
...
...
@@ -91,13 +91,13 @@ class FakeChannelWiseQuantizeAbsMaxKernel : public framework::OpKernel<T> {
framework
::
Tensor
one_channel
=
in
->
Slice
(
i
,
i
+
1
);
const
T
*
one_channel_data
=
one_channel
.
data
<
T
>
();
find_abs_max
(
dev_ctx
,
one_channel_data
,
one_channel
.
numel
(),
&
out_scale
s
_data
[
i
]);
&
out_scale_data
[
i
]);
}
auto
clip_quant
=
ClipAndFakeQuantFunctor
<
DeviceContext
,
T
>
();
for
(
int64_t
i
=
0
;
i
<
in
->
dims
()[
0
];
i
++
)
{
framework
::
Tensor
one_channel_in
=
in
->
Slice
(
i
,
i
+
1
);
framework
::
Tensor
one_channel_out
=
out
->
Slice
(
i
,
i
+
1
);
framework
::
Tensor
one_channel_scale
=
out_scale
s
->
Slice
(
i
,
i
+
1
);
framework
::
Tensor
one_channel_scale
=
out_scale
->
Slice
(
i
,
i
+
1
);
clip_quant
(
dev_ctx
,
one_channel_in
,
one_channel_scale
,
bin_cnt
,
&
one_channel_out
);
}
...
...
python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
浏览文件 @
ec88b6cc
...
...
@@ -22,6 +22,7 @@ from ....framework import IrGraph
from
....framework
import
IrNode
from
....framework
import
Program
from
....initializer
import
Constant
from
....initializer
import
NumpyArrayInitializer
from
....
import
unique_name
__all__
=
[
...
...
@@ -54,14 +55,15 @@ class QuantizationTransformPass(object):
the bias is not quantized.
activation_bits (int): quantization bit number for activation.
activation_quantize_type (str): quantization type for activation,
now support 'abs_max', 'range_abs_max'
. If use 'abs_max' mode,
the quantization scale will be calculated dynamically each step
in both training and testing period. If use 'range_abs_max',
a static quantization scale will be calculated during training
and used in inference.
now support 'abs_max', 'range_abs_max'
and 'moving_average_abs_max'.
If use 'abs_max' mode, the quantization scale will be calculated
dynamically each step in both training and testing period. If use
'range_abs_max', a static quantization scale will be calculated
during training
and used in inference.
weight_quantize_type (str): quantization type for weights,
support 'abs_max'. The 'range_abs_max' usually is not used for
weight, since weights are fixed once the model is well trained.
support 'abs_max' and 'channel_wise_abs_max'. The 'range_abs_max'
usually is not used for weight, since weights are fixed once the
model is well trained.
window_size (int): the window size for 'range_abs_max' quantization.
Examples:
...
...
@@ -84,7 +86,11 @@ class QuantizationTransformPass(object):
self
.
_weight_bits
=
weight_bits
self
.
_activation_bits
=
activation_bits
quant_type
=
[
'abs_max'
,
'range_abs_max'
,
'moving_average_abs_max'
]
quant_type
=
[
'abs_max'
,
'channel_wise_abs_max'
,
'range_abs_max'
,
'moving_average_abs_max'
]
assert
activation_quantize_type
!=
'channel_wise_abs_max'
,
"The activation quantization type does not support 'channel_wise_abs_max'."
if
activation_quantize_type
not
in
quant_type
:
raise
ValueError
(
"Unknown activation_quantize_type : '%s'. It can only be "
,
...
...
@@ -93,7 +99,7 @@ class QuantizationTransformPass(object):
if
weight_quantize_type
not
in
quant_type
:
raise
ValueError
(
"Unknown weight_quantize_type: '%s'. It can only be "
,
"'abs_max' or 'range_abs_max' or 'moving_average_abs_max'."
,
"'abs_max' or '
channel_wise_abs_max' or '
range_abs_max' or 'moving_average_abs_max'."
,
str
(
weight_quantize_type
))
self
.
_activation_quantize_type
=
activation_quantize_type
...
...
@@ -103,6 +109,7 @@ class QuantizationTransformPass(object):
self
.
_need_initialized
=
collections
.
OrderedDict
()
self
.
_quantizable_ops
=
[
'conv2d'
,
'depthwise_conv2d'
,
'mul'
]
self
.
_conv_ops
=
[
'conv2d'
,
'depthwise_conv2d'
]
self
.
_quantizable_grad_ops
=
[
'%s_grad'
%
(
op
)
for
op
in
self
.
_quantizable_ops
]
...
...
@@ -135,10 +142,26 @@ class QuantizationTransformPass(object):
else
self
.
_activation_bits
quant_type
=
self
.
_weight_quantize_type
if
var_node
.
name
()
\
in
persistable_vars
else
self
.
_activation_quantize_type
quant_var_node
,
scale_var_node
=
self
.
_insert_quant_op
(
graph
,
var_node
,
quant_bits
,
quant_type
)
dequant_var_node
=
self
.
_insert_dequant_op
(
graph
,
quant_var_node
,
scale_var_node
,
quant_bits
)
if
quant_type
==
'channel_wise_abs_max'
:
assert
var_node
.
name
(
)
in
persistable_vars
,
"'channel_wise_abs_max' can only be applied on weights."
if
op
.
name
()
in
self
.
_conv_ops
:
quant_var_node
,
scale_var_node
=
self
.
_insert_channel_quant_op
(
graph
,
var_node
,
quant_bits
)
dequant_var_node
=
self
.
_insert_channel_dequant_op
(
graph
,
quant_var_node
,
[
scale_var_node
],
[
quant_bits
])
else
:
quant_var_node
,
scale_var_node
=
self
.
_insert_quant_op
(
graph
,
var_node
,
quant_bits
,
'abs_max'
)
dequant_var_node
=
self
.
_insert_dequant_op
(
graph
,
quant_var_node
,
scale_var_node
,
quant_bits
)
else
:
quant_var_node
,
scale_var_node
=
self
.
_insert_quant_op
(
graph
,
var_node
,
quant_bits
,
quant_type
)
dequant_var_node
=
self
.
_insert_dequant_op
(
graph
,
quant_var_node
,
scale_var_node
,
quant_bits
)
dequantized_vars
[
var_node
.
name
()]
=
dequant_var_node
graph
.
update_input_link
(
var_node
,
dequant_var_node
,
op
)
...
...
@@ -244,7 +267,7 @@ class QuantizationTransformPass(object):
scale_var_node
=
graph
.
create_var_node
(
name
=
self
.
_quantized_scale_name
(
var_node
.
name
()),
var_type
=
var_node
.
type
(),
shape
=
var_node
.
shape
()
,
shape
=
[
1
]
,
var_dtype
=
var_node
.
dtype
())
quant_op_node
=
graph
.
create_op_node
(
op_type
=
'fake_quantize_abs_max'
,
...
...
@@ -384,6 +407,36 @@ class QuantizationTransformPass(object):
return
quant_var_node
,
scale_out_node
def
_insert_channel_quant_op
(
self
,
graph
,
var_node
,
quant_bits
):
"""
Insert fake_channel_wise_quantize_abs_max op in the graph.
"""
assert
var_node
.
is_var
(),
'{} is not a var'
.
format
(
var_node
.
name
())
quant_var_node
=
graph
.
create_var_node
(
name
=
self
.
_quantized_var_name
(
var_node
.
name
()),
var_type
=
var_node
.
type
(),
shape
=
var_node
.
shape
(),
var_dtype
=
var_node
.
dtype
())
scale_var_node
=
graph
.
create_var_node
(
name
=
self
.
_quantized_scale_name
(
var_node
.
name
()),
var_type
=
var_node
.
type
(),
shape
=
[
var_node
.
shape
()[
0
]],
var_dtype
=
var_node
.
dtype
())
quant_op_node
=
graph
.
create_op_node
(
op_type
=
'fake_channel_wise_quantize_abs_max'
,
attrs
=
{
'bit_length'
:
quant_bits
,
'op_role'
:
core
.
op_proto_and_checker_maker
.
OpRole
.
Forward
},
inputs
=
{
'X'
:
var_node
},
outputs
=
{
'Out'
:
quant_var_node
,
'OutScale'
:
scale_var_node
})
graph
.
link_to
(
var_node
,
quant_op_node
)
graph
.
link_to
(
quant_op_node
,
quant_var_node
)
graph
.
link_to
(
quant_op_node
,
scale_var_node
)
return
quant_var_node
,
scale_var_node
def
_insert_dequant_op
(
self
,
graph
,
var_node
,
scale_var_node
,
quant_bits
):
"""
Insert fake_dequantize_op in the graph.
...
...
@@ -410,6 +463,33 @@ class QuantizationTransformPass(object):
graph
.
link_to
(
dequant_op_node
,
dequant_var_node
)
return
dequant_var_node
def
_insert_channel_dequant_op
(
self
,
graph
,
var_node
,
scale_var_nodes
,
quant_bits
):
"""
Insert fake_channel_wise_dequantize_max_abs in the graph.
"""
assert
var_node
.
is_var
(),
'{} is not a var'
.
format
(
var_node
.
name
())
dequant_var_node
=
graph
.
create_var_node
(
name
=
self
.
_dequantized_var_name
(
var_node
.
name
()),
var_type
=
var_node
.
type
(),
shape
=
var_node
.
shape
(),
var_dtype
=
var_node
.
dtype
())
dequant_op_node
=
graph
.
create_op_node
(
op_type
=
'fake_channel_wise_dequantize_max_abs'
,
attrs
=
{
'quant_bits'
:
quant_bits
,
'op_role'
:
core
.
op_proto_and_checker_maker
.
OpRole
.
Forward
},
inputs
=
{
'X'
:
var_node
,
'Scales'
:
scale_var_nodes
},
outputs
=
{
'Out'
:
dequant_var_node
})
graph
.
link_to
(
var_node
,
dequant_op_node
)
for
scale_n
in
scale_var_nodes
:
graph
.
link_to
(
scale_n
,
dequant_op_node
)
graph
.
link_to
(
dequant_op_node
,
dequant_var_node
)
return
dequant_var_node
def
_quantized_var_name
(
self
,
var_name
):
"""
Return quantized variable name for the input `var_name`.
...
...
@@ -442,7 +522,7 @@ class QuantizationFreezePass(object):
place(fluid.CPUPlace|fluid.CUDAPlace): place is used to restore the weight tensors.
weight_bits (int): quantization bit number for weights.
activation_bits (int): quantization bit number for activation.
weight_quantize_type (str): quantization type for weights, support 'abs_max'.
weight_quantize_type (str): quantization type for weights, support 'abs_max'
and 'channel_wise_abs_max'
.
The 'range_abs_max' usually is not used for weight, since weights are fixed once the
model is well trained.
"""
...
...
@@ -463,11 +543,15 @@ class QuantizationFreezePass(object):
self
.
_activation_bits
=
activation_bits
self
.
_weight_quantize_type
=
weight_quantize_type
self
.
_quantizable_ops
=
[
'conv2d'
,
'depthwise_conv2d'
,
'mul'
]
self
.
_conv_ops
=
[
'conv2d'
,
'depthwise_conv2d'
]
self
.
_fake_quant_op_names
=
[
'fake_quantize_abs_max'
,
'fake_quantize_range_abs_max'
,
'fake_quantize_moving_average_abs_max'
'fake_quantize_moving_average_abs_max'
,
'fake_channel_wise_quantize_abs_max'
]
self
.
_fake_dequant_op_names
=
[
'fake_dequantize_max_abs'
,
'fake_channel_wise_dequantize_max_abs'
]
self
.
_fake_dequant_op_names
=
[
'fake_dequantize_max_abs'
]
self
.
_op_input_rename_map
=
collections
.
OrderedDict
()
self
.
_op_output_rename_map
=
collections
.
OrderedDict
()
self
.
_var_scale_map
=
collections
.
OrderedDict
()
...
...
@@ -489,20 +573,29 @@ class QuantizationFreezePass(object):
if
self
.
_weight_quantize_type
==
'abs_max'
:
param
=
self
.
_load_var
(
input_arg_name
)
scale_v
=
np
.
max
(
np
.
abs
(
param
))
elif
self
.
_weight_quantize_type
==
'channel_wise_abs_max'
:
param
=
self
.
_load_var
(
input_arg_name
)
if
len
(
param
.
shape
)
==
4
:
# conv2d or depthwise_conv2d
print
(
'DEBUG**************************: %s'
%
input_arg_name
)
scale_v
=
[]
for
i
in
range
(
param
.
shape
[
0
]):
scale_v
.
append
(
np
.
max
(
np
.
abs
(
param
[
i
])))
else
:
scale_v
=
np
.
max
(
np
.
abs
(
param
))
else
:
scale_v
=
self
.
_load_var
(
op_node
.
output
(
'OutScale'
)[
0
])[
0
]
self
.
_var_scale_map
[
input_arg_name
]
=
scale_v
else
:
scale_v
=
graph
.
var_node
(
op_node
.
output
(
'OutScale'
)[
0
])
self
.
_var_scale_map
[
input_arg_name
]
=
scale_v
if
input_arg_name
in
persistable_vars
:
self
.
_remove_fake_quant_and_dequant_op
(
graph
,
op_node
)
# quantize weight and restore
param_v
=
self
.
_load_var
(
input_arg_name
)
quantized_param_v
=
self
.
_quant
(
param_v
,
scale_v
,
self
.
_weight_bits
)
self
.
_restore_var
(
input_arg_name
,
quantized_param_v
)
else
:
scale_v
=
graph
.
var_node
(
op_node
.
output
(
'OutScale'
)[
0
])
self
.
_var_scale_map
[
input_arg_name
]
=
scale_v
ops
=
graph
.
all_op_nodes
()
for
op_node
in
ops
:
...
...
@@ -514,7 +607,10 @@ class QuantizationFreezePass(object):
for
op_node
in
ops
:
op_name
=
op_node
.
name
()
if
op_name
in
self
.
_quantizable_ops
:
self
.
_insert_post_dequant_op
(
graph
,
op_node
)
if
self
.
_weight_quantize_type
==
'channel_wise_abs_max'
and
op_name
in
self
.
_conv_ops
:
self
.
_insert_post_channel_dequant_op
(
graph
,
op_node
)
else
:
self
.
_insert_post_dequant_op
(
graph
,
op_node
)
for
op_node
in
ops
:
# insert dequant_op after fc/conv, need to rename inputs of the followed ops
...
...
@@ -538,9 +634,73 @@ class QuantizationFreezePass(object):
self
.
_op_input_rename_map
[
k
]
=
self
.
_op_input_rename_map
[
v
]
graph
.
safe_remove_nodes
(
op_node
)
def
_insert_post_channel_dequant_op
(
self
,
graph
,
op_node
):
persistable_vars
=
[
p
.
name
()
for
p
in
graph
.
all_persistable_nodes
()]
for
var_node
in
op_node
.
inputs
:
name
=
var_node
.
name
()
if
name
in
self
.
_op_input_rename_map
:
old_in
=
graph
.
var_node
(
name
)
new_in
=
graph
.
var_node
(
self
.
_op_input_rename_map
[
name
])
new_in
.
clear_outputs
()
graph
.
update_input_link
(
old_in
,
new_in
,
op_node
)
original_var_name
=
self
.
_original_var_name
(
name
)
scale_v
=
self
.
_var_scale_map
[
original_var_name
]
if
original_var_name
in
persistable_vars
:
assert
isinstance
(
scale_v
,
list
),
'The scale of parameter %s is not a list.'
%
(
original_var_name
)
channel_scale
=
np
.
array
(
scale_v
)
else
:
assert
isinstance
(
scale_v
,
IrNode
)
scale_var_node
=
self
.
_var_scale_map
[
original_var_name
]
if
len
(
op_node
.
outputs
)
!=
1
:
raise
ValueError
(
"Only support one output, but op %s has"
" more than one output."
%
(
op_node
.
name
()))
output_var_node
=
op_node
.
outputs
[
0
]
weight_scale_node
=
graph
.
create_persistable_node
(
name
=
unique_name
.
generate
(
'channel_scale'
),
var_type
=
core
.
VarDesc
.
VarType
.
LOD_TENSOR
,
shape
=
[
channel_scale
.
shape
[
0
]],
var_dtype
=
output_var_node
.
dtype
())
init_program
=
Program
()
weight_scale_var
=
init_program
.
global_block
().
create_var
(
name
=
weight_scale_node
.
name
(),
shape
=
weight_scale_node
.
shape
(),
dtype
=
weight_scale_node
.
dtype
(),
type
=
weight_scale_node
.
type
(),
lod_level
=
weight_scale_node
.
var
().
lod_level
(),
persistable
=
weight_scale_node
.
persistable
())
initializer
=
NumpyArrayInitializer
(
value
=
channel_scale
)
initializer
(
weight_scale_var
,
init_program
.
global_block
())
exe
=
Executor
(
self
.
_place
)
exe
.
run
(
program
=
init_program
,
scope
=
self
.
_scope
)
dequant_var_node
=
graph
.
create_var_node
(
name
=
self
.
_dequantized_var_name
(
output_var_node
.
name
()),
var_type
=
output_var_node
.
type
(),
shape
=
output_var_node
.
shape
(),
var_dtype
=
output_var_node
.
dtype
())
dequant_op_node
=
graph
.
create_op_node
(
op_type
=
'fake_channel_wise_dequantize_max_abs'
,
attrs
=
{
'quant_bits'
:
[
self
.
_weight_bits
,
self
.
_activation_bits
],
'op_role'
:
core
.
op_proto_and_checker_maker
.
OpRole
.
Forward
},
inputs
=
{
'X'
:
output_var_node
,
'Scales'
:
[
weight_scale_node
,
scale_var_node
]
},
outputs
=
{
'Out'
:
dequant_var_node
})
graph
.
link_to
(
output_var_node
,
dequant_op_node
)
graph
.
link_to
(
scale_var_node
,
dequant_op_node
)
graph
.
link_to
(
weight_scale_node
,
dequant_op_node
)
graph
.
link_to
(
dequant_op_node
,
dequant_var_node
)
self
.
_op_output_rename_map
[
output_var_node
.
name
()]
=
dequant_var_node
return
dequant_var_node
def
_insert_post_dequant_op
(
self
,
graph
,
op_node
):
max_range
=
None
scale_var_node
=
None
persistable_vars
=
[
p
.
name
()
for
p
in
graph
.
all_persistable_nodes
()]
for
var_node
in
op_node
.
inputs
:
name
=
var_node
.
name
()
...
...
@@ -637,7 +797,12 @@ class QuantizationFreezePass(object):
or
isinstance
(
v
,
np
.
float64
)
def
_quant
(
self
,
x
,
scale
,
num_bits
):
return
np
.
round
(
x
/
scale
*
((
1
<<
(
num_bits
-
1
))
-
1
))
if
isinstance
(
scale
,
list
):
for
i
,
s
in
enumerate
(
scale
):
x
[
i
]
=
np
.
round
(
x
[
i
]
/
s
*
((
1
<<
(
num_bits
-
1
))
-
1
))
return
x
else
:
return
np
.
round
(
x
/
scale
*
((
1
<<
(
num_bits
-
1
))
-
1
))
class
ConvertToInt8Pass
(
object
):
...
...
@@ -731,9 +896,13 @@ class TransformForMobilePass(object):
def
__init__
(
self
):
self
.
_fake_quant_op_names
=
[
'fake_quantize_abs_max'
,
'fake_quantize_range_abs_max'
'fake_quantize_abs_max'
,
'fake_quantize_range_abs_max'
,
'fake_quantize_moving_average_abs_max'
,
'fake_channel_wise_quantize_abs_max'
]
self
.
_fake_dequant_op_names
=
[
'fake_dequantize_max_abs'
,
'fake_channel_wise_dequantize_max_abs'
]
self
.
_fake_dequant_op_names
=
[
'fake_dequantize_max_abs'
]
def
apply
(
self
,
graph
):
"""
...
...
python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
浏览文件 @
ec88b6cc
...
...
@@ -243,7 +243,12 @@ class TestQuantizationFreezePass(unittest.TestCase):
with
fluid
.
scope_guard
(
scope
):
exe
.
run
(
startup
)
transform_pass
=
QuantizationTransformPass
(
scope
=
scope
,
place
=
place
,
activation_quantize_type
=
quant_type
)
scope
=
scope
,
place
=
place
,
activation_quantize_type
=
quant_type
,
weight_quantize_type
=
'channel_wise_abs_max'
)
#transform_pass = QuantizationTransformPass(
# scope=scope, place=place, activation_quantize_type=quant_type)
transform_pass
.
apply
(
main_graph
)
transform_pass
.
apply
(
test_graph
)
dev_name
=
'_gpu_'
if
use_cuda
else
'_cpu_'
...
...
@@ -296,7 +301,11 @@ class TestQuantizationFreezePass(unittest.TestCase):
fetch_list
=
[
loss
,
w_var
])
# Freeze graph for inference, but the weight of fc/conv is still float type.
freeze_pass
=
QuantizationFreezePass
(
scope
=
scope
,
place
=
place
)
freeze_pass
=
QuantizationFreezePass
(
scope
=
scope
,
place
=
place
,
weight_quantize_type
=
'channel_wise_abs_max'
)
#freeze_pass = QuantizationFreezePass(scope=scope, place=place)
freeze_pass
.
apply
(
test_graph
)
if
not
for_ci
:
marked_nodes
=
set
()
...
...
@@ -375,29 +384,32 @@ class TestQuantizationFreezePass(unittest.TestCase):
if
fluid
.
core
.
is_compiled_with_cuda
():
with
fluid
.
unique_name
.
guard
():
self
.
freeze_graph
(
True
,
seed
=
1
,
quant_type
=
'abs_max'
,
for_ci
=
Tru
e
)
True
,
seed
=
1
,
quant_type
=
'abs_max'
,
for_ci
=
Fals
e
)
def
test_freeze_graph_cpu_dynamic
(
self
):
with
fluid
.
unique_name
.
guard
():
self
.
freeze_graph
(
False
,
seed
=
2
,
quant_type
=
'abs_max'
,
for_ci
=
Tru
e
)
self
.
freeze_graph
(
False
,
seed
=
2
,
quant_type
=
'abs_max'
,
for_ci
=
Fals
e
)
def
test_freeze_graph_cuda_static
(
self
):
if
fluid
.
core
.
is_compiled_with_cuda
():
with
fluid
.
unique_name
.
guard
():
self
.
freeze_graph
(
True
,
seed
=
1
,
quant_type
=
'range_abs_max'
,
for_ci
=
Tru
e
)
True
,
seed
=
1
,
quant_type
=
'range_abs_max'
,
for_ci
=
Fals
e
)
self
.
freeze_graph
(
True
,
seed
=
1
,
quant_type
=
'moving_average_abs_max'
,
for_ci
=
Tru
e
)
for_ci
=
Fals
e
)
def
test_freeze_graph_cpu_static
(
self
):
with
fluid
.
unique_name
.
guard
():
self
.
freeze_graph
(
False
,
seed
=
2
,
quant_type
=
'range_abs_max'
,
for_ci
=
Tru
e
)
False
,
seed
=
2
,
quant_type
=
'range_abs_max'
,
for_ci
=
Fals
e
)
self
.
freeze_graph
(
False
,
seed
=
2
,
quant_type
=
'moving_average_abs_max'
,
for_ci
=
True
)
False
,
seed
=
2
,
quant_type
=
'moving_average_abs_max'
,
for_ci
=
False
)
if
__name__
==
'__main__'
:
...
...
python/paddle/fluid/tests/unittests/test_fake_dequantize_op.py
浏览文件 @
ec88b6cc
...
...
@@ -31,15 +31,27 @@ def dequantize_max_abs(x, scale, max_range):
return
y
def
channel_wise_quantize_max_abs
(
x
,
quant_bit
=
8
):
def
channel_wise_quantize_max_abs
(
x
,
quant_bit
=
8
,
use_second_dim
=
False
):
scales
=
[]
for
i
in
range
(
x
.
shape
[
0
]):
scales
.
append
(
np
.
max
(
np
.
abs
(
x
[
i
])).
astype
(
"float32"
))
y
=
x
.
copy
()
max_range
=
math
.
pow
(
2
,
quant_bit
-
1
)
-
1
for
i
,
scale
in
enumerate
(
scales
):
y
[
i
]
=
np
.
round
(
y
[
i
]
/
scale
*
max_range
)
if
not
use_second_dim
:
for
i
in
range
(
x
.
shape
[
0
]):
scales
.
append
(
np
.
max
(
np
.
abs
(
x
[
i
])).
astype
(
"float32"
))
y
=
x
.
copy
()
max_range
=
math
.
pow
(
2
,
quant_bit
-
1
)
-
1
for
i
,
scale
in
enumerate
(
scales
):
y
[
i
]
=
np
.
round
(
x
[
i
]
/
scale
*
max_range
)
else
:
for
i
in
range
(
x
.
shape
[
0
]):
s
=
[]
for
j
in
range
(
x
.
shape
[
1
]):
s
.
append
(
np
.
max
(
np
.
abs
(
x
[
i
][
j
])).
astype
(
"float32"
))
scales
.
append
(
s
)
scales
=
np
.
amax
(
np
.
array
(
scales
),
axis
=
0
)
y
=
x
.
copy
()
max_range
=
math
.
pow
(
2
,
quant_bit
-
1
)
-
1
for
i
in
range
(
x
.
shape
[
0
]):
for
j
,
scale
in
enumerate
(
scales
):
y
[
i
][
j
]
=
np
.
round
(
x
[
i
][
j
]
/
scale
*
max_range
)
return
y
,
scales
...
...
@@ -47,10 +59,16 @@ def channel_wise_dequantize_max_abs(x,
scales
,
quant_bits
,
activation_scale
=
None
):
y
=
x
.
copy
()
for
i
in
range
(
x
.
shape
[
0
]):
y
[
i
]
=
(
scales
[
i
]
/
(
math
.
pow
(
2
,
quant_bits
[
0
]
-
1
)
-
1
))
*
y
[
i
]
if
activation_scale
is
not
None
:
if
activation_scale
is
None
:
y
=
x
.
copy
()
for
i
in
range
(
x
.
shape
[
0
]):
y
[
i
]
=
(
scales
[
i
]
/
(
math
.
pow
(
2
,
quant_bits
[
0
]
-
1
)
-
1
))
*
x
[
i
]
else
:
y
=
x
.
copy
()
for
i
in
range
(
x
.
shape
[
0
]):
for
j
in
range
(
x
.
shape
[
1
]):
y
[
i
][
j
]
=
(
scales
[
j
]
/
(
math
.
pow
(
2
,
quant_bits
[
0
]
-
1
)
-
1
))
*
x
[
i
][
j
]
y
*=
activation_scale
/
(
math
.
pow
(
2
,
quant_bits
[
1
]
-
1
)
-
1
)
return
y
...
...
@@ -65,7 +83,8 @@ class TestFakeChannelWiseDequantizeMaxAbsOpTwoScales(OpTest):
self
.
set_args
()
self
.
op_type
=
"fake_channel_wise_dequantize_max_abs"
x
=
np
.
random
.
randn
(
4
,
3
,
64
,
64
).
astype
(
self
.
data_type
)
yq
,
scales
=
channel_wise_quantize_max_abs
(
x
,
self
.
quant_bits
[
0
])
yq
,
scales
=
channel_wise_quantize_max_abs
(
x
,
self
.
quant_bits
[
0
],
use_second_dim
=
True
)
ydq
=
channel_wise_dequantize_max_abs
(
yq
,
scales
,
self
.
quant_bits
,
self
.
activation_scale
)
...
...
python/paddle/fluid/tests/unittests/test_fake_quantize_op.py
浏览文件 @
ec88b6cc
...
...
@@ -53,7 +53,7 @@ class TestFakeChannelWiseQuantizeOp(OpTest):
self
.
outputs
=
{
'Out'
:
outputs
,
'OutScale
s
'
:
np
.
array
(
scales
).
astype
(
"float32"
),
'OutScale'
:
np
.
array
(
scales
).
astype
(
"float32"
),
}
def
test_check_output
(
self
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录