Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
81b4fad8
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
81b4fad8
编写于
3月 15, 2019
作者:
视言
提交者:
qingqing01
3月 15, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add moving average absmax op and fix bug (#15155)
* Add moving average absmax op in quantilize-aware training.
上级
92b9ce34
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
409 addition
and
17 deletion
+409
-17
paddle/fluid/API.spec
paddle/fluid/API.spec
+1
-1
paddle/fluid/operators/fake_quantize_op.cc
paddle/fluid/operators/fake_quantize_op.cc
+102
-0
paddle/fluid/operators/fake_quantize_op.cu
paddle/fluid/operators/fake_quantize_op.cu
+38
-0
paddle/fluid/operators/fake_quantize_op.h
paddle/fluid/operators/fake_quantize_op.h
+58
-1
python/paddle/fluid/contrib/quantize/quantize_transpiler.py
python/paddle/fluid/contrib/quantize/quantize_transpiler.py
+74
-10
python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
...ddle/fluid/contrib/slim/quantization/quantization_pass.py
+81
-5
python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
...paddle/fluid/contrib/slim/tests/test_quantization_pass.py
+13
-0
python/paddle/fluid/tests/unittests/test_fake_quantize_op.py
python/paddle/fluid/tests/unittests/test_fake_quantize_op.py
+42
-0
未找到文件。
paddle/fluid/API.spec
浏览文件 @
81b4fad8
...
...
@@ -367,7 +367,7 @@ paddle.fluid.contrib.BeamSearchDecoder.read_array (ArgSpec(args=['self', 'init',
paddle.fluid.contrib.BeamSearchDecoder.update_array (ArgSpec(args=['self', 'array', 'value'], varargs=None, keywords=None, defaults=None), ('document', '5754e9b3212b7c09497151516a0de5a7'))
paddle.fluid.contrib.memory_usage (ArgSpec(args=['program', 'batch_size'], varargs=None, keywords=None, defaults=None), ('document', '8fcb2f93bb743693baa8d4860a5ccc47'))
paddle.fluid.contrib.op_freq_statistic (ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None), ('document', '4d43687113c4bf5b29d15aee2f4e4afa'))
paddle.fluid.contrib.QuantizeTranspiler.__init__ (ArgSpec(args=['self', 'weight_bits', 'activation_bits', 'activation_quantize_type', 'weight_quantize_type', 'window_size'
], varargs=None, keywords=None, defaults=(8, 8, 'abs_max', 'abs_max', 10000
)), ('document', '14b39f1fcd5667ff556b1aad94357d1d'))
paddle.fluid.contrib.QuantizeTranspiler.__init__ (ArgSpec(args=['self', 'weight_bits', 'activation_bits', 'activation_quantize_type', 'weight_quantize_type', 'window_size'
, 'moving_rate'], varargs=None, keywords=None, defaults=(8, 8, 'abs_max', 'abs_max', 10000, 0.9
)), ('document', '14b39f1fcd5667ff556b1aad94357d1d'))
paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 (ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.QuantizeTranspiler.freeze_program (ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None)), ('document', '909675a1ab055c69b436a7893fcae4fd'))
paddle.fluid.contrib.QuantizeTranspiler.training_transpile (ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)), ('document', '6dd9909f10b283ba2892a99058a72884'))
...
...
paddle/fluid/operators/fake_quantize_op.cc
浏览文件 @
81b4fad8
...
...
@@ -81,6 +81,30 @@ struct FindRangeAbsMaxFunctor<platform::CPUDeviceContext, T> {
template
struct
FindRangeAbsMaxFunctor
<
platform
::
CPUDeviceContext
,
float
>;
template
<
typename
T
>
struct
FindMovingAverageAbsMaxFunctor
<
platform
::
CPUDeviceContext
,
T
>
{
void
operator
()(
const
platform
::
CPUDeviceContext
&
ctx
,
const
framework
::
Tensor
&
in_accum
,
const
framework
::
Tensor
&
in_state
,
const
T
*
cur_scale
,
const
float
rate
,
framework
::
Tensor
*
out_state
,
framework
::
Tensor
*
out_accum
,
framework
::
Tensor
*
out_scale
)
{
T
accum
=
in_accum
.
data
<
T
>
()[
0
];
T
state
=
in_state
.
data
<
T
>
()[
0
];
T
scale
=
cur_scale
[
0
];
state
=
rate
*
state
+
1
;
accum
=
rate
*
accum
+
scale
;
scale
=
accum
/
state
;
out_state
->
mutable_data
<
T
>
(
ctx
.
GetPlace
())[
0
]
=
state
;
out_accum
->
mutable_data
<
T
>
(
ctx
.
GetPlace
())[
0
]
=
accum
;
out_scale
->
mutable_data
<
T
>
(
ctx
.
GetPlace
())[
0
]
=
scale
;
}
};
template
struct
FindMovingAverageAbsMaxFunctor
<
platform
::
CPUDeviceContext
,
float
>;
class
FakeQuantizeAbsMaxOp
:
public
framework
::
OperatorWithKernel
{
public:
FakeQuantizeAbsMaxOp
(
const
std
::
string
&
type
,
...
...
@@ -255,6 +279,78 @@ $$Out = round(X/scale * range)$$
}
};
class
FakeQuantizeMovingAverageAbsMaxOp
:
public
framework
::
OperatorWithKernel
{
public:
FakeQuantizeMovingAverageAbsMaxOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
OperatorWithKernel
(
type
,
inputs
,
outputs
,
attrs
)
{}
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of FakeQuantizeMovingAverageAbsMaxOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of FakeQuantizeMovingAverageAbsMaxOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"OutScale"
),
"Output(OutScale) of FakeQuantizeMovingAverageAbsMaxOp "
"should not be null"
);
if
(
ctx
->
HasOutput
(
"OutState"
))
{
ctx
->
SetOutputDim
(
"OutState"
,
{
1
});
}
if
(
ctx
->
HasOutput
(
"OutAccum"
))
{
ctx
->
SetOutputDim
(
"OutAccum"
,
{
1
});
}
ctx
->
SetOutputDim
(
"Out"
,
ctx
->
GetInputDim
(
"X"
));
ctx
->
SetOutputDim
(
"OutScale"
,
{
1
});
ctx
->
ShareLoD
(
"X"
,
/*->*/
"Out"
);
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"X"
)
->
type
(),
ctx
.
device_context
());
}
};
class
FakeQuantizeMovingAverageAbsMaxOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"X"
,
"(Tensor) Input is float data type."
);
AddInput
(
"InScale"
,
"Last scale."
);
AddInput
(
"InAccum"
,
"Last accum."
).
AsDispensable
();
AddInput
(
"InState"
,
"Last state."
).
AsDispensable
();
AddOutput
(
"Out"
,
"(Tensor) Output of quantized low level tensor."
);
AddOutput
(
"OutScale"
,
" Current scale"
);
AddOutput
(
"OutState"
,
"(Tensor) state buffer."
).
AsDispensable
();
AddOutput
(
"OutAccum"
,
"(Tensor) accum buffer."
).
AsDispensable
();
AddAttr
<
float
>
(
"moving_rate"
,
"(float, default 0.9) moving rate."
)
.
SetDefault
(
0.9
);
AddAttr
<
int
>
(
"bit_length"
,
"(int, default 8), quantization bit number."
)
.
SetDefault
(
8
)
.
AddCustomChecker
([](
const
int
&
bit_length
)
{
PADDLE_ENFORCE
(
bit_length
>=
1
&&
bit_length
<=
16
,
"'bit_length' should be between 1 and 16."
);
});
AddAttr
<
bool
>
(
"is_test"
,
"(bool, default false) Set to true for inference only, false "
"for training. Some layers may run faster when this is true."
)
.
SetDefault
(
false
);
AddComment
(
R"DOC(
FakeQuantize operator is used in static quantization.
$$scale = (0.9*max(abs(x))+accum)/(0.9*state+1)$$
$$range = 2^{bit_length - 1} - 1$$
$$Out = round(X/scale * range)$$
)DOC"
);
}
};
}
// namespace operators
}
// namespace paddle
...
...
@@ -273,6 +369,12 @@ REGISTER_OPERATOR(fake_quantize_range_abs_max, ops::FakeQuantizeRangeAbsMaxOp,
REGISTER_OP_CPU_KERNEL
(
fake_quantize_range_abs_max
,
ops
::
FakeQuantizeRangeAbsMaxKernel
<
CPU
,
float
>
);
REGISTER_OPERATOR
(
fake_quantize_moving_average_abs_max
,
ops
::
FakeQuantizeMovingAverageAbsMaxOp
,
ops
::
FakeQuantizeMovingAverageAbsMaxOpMaker
,
paddle
::
framework
::
EmptyGradOpMaker
);
REGISTER_OP_CPU_KERNEL
(
fake_quantize_moving_average_abs_max
,
ops
::
FakeQuantizeMovingAverageAbsMaxKernel
<
CPU
,
float
>
);
REGISTER_OPERATOR
(
fake_channel_wise_quantize_abs_max
,
ops
::
FakeChannelWiseQuantizeAbsMaxOp
,
ops
::
FakeChannelWiseQuantizeAbsMaxOpMaker
,
...
...
paddle/fluid/operators/fake_quantize_op.cu
浏览文件 @
81b4fad8
...
...
@@ -147,6 +147,41 @@ struct FindRangeAbsMaxFunctor<platform::CUDADeviceContext, T> {
template
struct
FindRangeAbsMaxFunctor
<
platform
::
CUDADeviceContext
,
float
>;
template
<
typename
T
>
struct
FindMovingAverageAbsMaxFunctor
<
platform
::
CUDADeviceContext
,
T
>
{
void
operator
()(
const
platform
::
CUDADeviceContext
&
ctx
,
const
framework
::
Tensor
&
in_accum
,
const
framework
::
Tensor
&
in_state
,
const
T
*
cur_scale
,
const
float
rate
,
framework
::
Tensor
*
out_state
,
framework
::
Tensor
*
out_accum
,
framework
::
Tensor
*
out_scale
)
{
const
auto
gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
());
T
accum
;
memory
::
Copy
(
platform
::
CPUPlace
(),
&
accum
,
gpu_place
,
in_accum
.
data
<
T
>
(),
sizeof
(
T
),
0
);
T
state
;
memory
::
Copy
(
platform
::
CPUPlace
(),
&
state
,
gpu_place
,
in_state
.
data
<
T
>
(),
sizeof
(
T
),
0
);
T
scale
;
memory
::
Copy
(
platform
::
CPUPlace
(),
&
scale
,
gpu_place
,
cur_scale
,
sizeof
(
T
),
0
);
state
=
rate
*
state
+
1
;
accum
=
rate
*
accum
+
scale
;
scale
=
accum
/
state
;
memory
::
Copy
(
gpu_place
,
out_accum
->
mutable_data
<
T
>
(
gpu_place
),
platform
::
CPUPlace
(),
&
accum
,
sizeof
(
T
),
0
);
memory
::
Copy
(
gpu_place
,
out_state
->
mutable_data
<
T
>
(
gpu_place
),
platform
::
CPUPlace
(),
&
state
,
sizeof
(
T
),
0
);
memory
::
Copy
(
gpu_place
,
out_scale
->
mutable_data
<
T
>
(
gpu_place
),
platform
::
CPUPlace
(),
&
scale
,
sizeof
(
T
),
0
);
}
};
template
struct
FindMovingAverageAbsMaxFunctor
<
platform
::
CUDADeviceContext
,
float
>;
template
<
typename
T
>
struct
ClipAndFakeQuantFunctor
<
platform
::
CUDADeviceContext
,
T
>
{
void
operator
()(
const
platform
::
CUDADeviceContext
&
ctx
,
...
...
@@ -178,3 +213,6 @@ REGISTER_OP_CUDA_KERNEL(fake_channel_wise_quantize_abs_max,
ops
::
FakeChannelWiseQuantizeAbsMaxKernel
<
CUDA
,
float
>
);
REGISTER_OP_CUDA_KERNEL
(
fake_quantize_range_abs_max
,
ops
::
FakeQuantizeRangeAbsMaxKernel
<
CUDA
,
float
>
);
REGISTER_OP_CUDA_KERNEL
(
fake_quantize_moving_average_abs_max
,
ops
::
FakeQuantizeMovingAverageAbsMaxKernel
<
CUDA
,
float
>
);
paddle/fluid/operators/fake_quantize_op.h
浏览文件 @
81b4fad8
...
...
@@ -42,12 +42,20 @@ struct FindRangeAbsMaxFunctor {
framework
::
Tensor
*
scales_arr
,
framework
::
Tensor
*
out_scale
);
};
template
<
typename
DeviceContext
,
typename
T
>
struct
FindMovingAverageAbsMaxFunctor
{
void
operator
()(
const
DeviceContext
&
ctx
,
const
framework
::
Tensor
&
in_accum
,
const
framework
::
Tensor
&
in_state
,
const
framework
::
Tensor
&
cur_scale
,
framework
::
Tensor
*
out_state
,
framework
::
Tensor
*
out_accum
,
framework
::
Tensor
*
out_scale
);
};
template
<
typename
DeviceContext
,
typename
T
>
class
FakeQuantizeAbsMaxKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
in
=
context
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
out
=
context
.
Output
<
framework
::
Tensor
>
(
"Out"
);
auto
*
out_scale
=
context
.
Output
<
framework
::
Tensor
>
(
"OutScale"
);
T
*
out_s
=
out_scale
->
mutable_data
<
T
>
(
context
.
GetPlace
());
...
...
@@ -138,5 +146,54 @@ class FakeQuantizeRangeAbsMaxKernel : public framework::OpKernel<T> {
}
};
template
<
typename
DeviceContext
,
typename
T
>
class
FakeQuantizeMovingAverageAbsMaxKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
in
=
context
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
in_scale
=
context
.
Input
<
framework
::
Tensor
>
(
"InScale"
);
auto
*
out
=
context
.
Output
<
framework
::
Tensor
>
(
"Out"
);
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
bool
is_test
=
context
.
Attr
<
bool
>
(
"is_test"
);
int
bit_length
=
context
.
Attr
<
int
>
(
"bit_length"
);
int
bin_cnt
=
std
::
pow
(
2
,
bit_length
-
1
)
-
1
;
auto
&
dev_ctx
=
context
.
template
device_context
<
DeviceContext
>();
// testing
if
(
is_test
)
{
ClipAndFakeQuantFunctor
<
DeviceContext
,
T
>
()(
dev_ctx
,
*
in
,
*
in_scale
,
bin_cnt
,
out
);
return
;
}
// training
auto
*
in_accum
=
context
.
Input
<
framework
::
Tensor
>
(
"InAccum"
);
auto
*
in_state
=
context
.
Input
<
framework
::
Tensor
>
(
"InState"
);
auto
&
allocator
=
platform
::
DeviceTemporaryAllocator
::
Instance
().
Get
(
dev_ctx
);
auto
cur_scale
=
allocator
.
Allocate
(
1
*
sizeof
(
T
));
T
*
cur_scale_data
=
static_cast
<
T
*>
(
cur_scale
->
ptr
());
FindAbsMaxFunctor
<
DeviceContext
,
T
>
()(
dev_ctx
,
in
->
data
<
T
>
(),
in
->
numel
(),
cur_scale_data
);
auto
*
out_state
=
context
.
Output
<
framework
::
Tensor
>
(
"OutState"
);
auto
*
out_accum
=
context
.
Output
<
framework
::
Tensor
>
(
"OutAccum"
);
auto
*
out_scale
=
context
.
Output
<
framework
::
Tensor
>
(
"OutScale"
);
out_state
->
mutable_data
<
T
>
(
context
.
GetPlace
());
out_accum
->
mutable_data
<
T
>
(
context
.
GetPlace
());
out_scale
->
mutable_data
<
T
>
(
context
.
GetPlace
());
float
moving_rate
=
context
.
Attr
<
float
>
(
"moving_rate"
);
FindMovingAverageAbsMaxFunctor
<
DeviceContext
,
T
>
()(
dev_ctx
,
*
in_accum
,
*
in_state
,
cur_scale_data
,
moving_rate
,
out_state
,
out_accum
,
out_scale
);
ClipAndFakeQuantFunctor
<
DeviceContext
,
T
>
()(
dev_ctx
,
*
in
,
*
out_scale
,
bin_cnt
,
out
);
}
};
}
// namespace operators
}
// namespace paddle
python/paddle/fluid/contrib/quantize/quantize_transpiler.py
浏览文件 @
81b4fad8
...
...
@@ -84,7 +84,8 @@ class QuantizeTranspiler(object):
activation_bits
=
8
,
activation_quantize_type
=
'abs_max'
,
weight_quantize_type
=
'abs_max'
,
window_size
=
10000
):
window_size
=
10000
,
moving_rate
=
0.9
):
"""
Convert and rewrite the fluid Program according to weight and
activation quantization type.
...
...
@@ -117,23 +118,27 @@ class QuantizeTranspiler(object):
"""
self
.
weight_bits
=
weight_bits
self
.
activation_bits
=
activation_bits
quant_type
=
[
'abs_max'
,
'range_abs_max'
]
quant_type
=
[
'abs_max'
,
'range_abs_max'
,
'moving_average_abs_max'
]
if
weight_quantize_type
not
in
quant_type
:
raise
ValueError
(
"Unknown weight_quantize_type: '%s'. It can only be "
,
"'abs_max' or 'range_abs_max'."
,
str
(
weight_quantize_type
))
"'abs_max' or 'range_abs_max' or 'moving_average_abs_max'."
,
str
(
weight_quantize_type
))
if
activation_quantize_type
not
in
quant_type
:
raise
ValueError
(
"Unknown activation_quantize_type : '%s'. It can only be "
,
"'abs_max' or 'range_abs_max'."
,
str
(
activation_quantize_type
))
"'abs_max' or 'range_abs_max' or 'moving_average_abs_max'."
,
str
(
activation_quantize_type
))
self
.
weight_quantize_type
=
weight_quantize_type
self
.
activation_quantize_type
=
activation_quantize_type
self
.
window_size
=
window_size
self
.
moving_rate
=
moving_rate
self
.
helper
=
LayerHelper
(
self
.
__class__
.
__name__
)
self
.
fake_quant_op_types
=
[
'fake_quantize_abs_max'
,
'fake_quantize_range_abs_max'
'fake_quantize_abs_max'
,
'fake_quantize_range_abs_max'
,
'fake_quantize_moving_average_abs_max'
]
self
.
fake_dequant_op_types
=
[
'fake_dequantize_max_abs'
]
self
.
is_test
=
None
...
...
@@ -168,6 +173,7 @@ class QuantizeTranspiler(object):
block_id
=
block
.
idx
# insert quant op and dequant op
for
name
in
op
.
input_arg_names
:
#if share input between ops
if
name
in
dequanted_vars
[
block_id
]:
dequant_var
=
dequanted_vars
[
block_id
][
name
]
else
:
...
...
@@ -261,6 +267,7 @@ class QuantizeTranspiler(object):
max_range
=
None
scale_var
=
None
for
name
in
op
.
input_arg_names
:
#rename input name of the op to the input name of last op which has be removed
if
name
in
op_in_rename_map
[
block_id
]:
op
.
_rename_input
(
name
,
op_in_rename_map
[
block_id
][
name
])
...
...
@@ -272,8 +279,7 @@ class QuantizeTranspiler(object):
max_range
=
param_range
*
act_range
/
scale_v
else
:
assert
isinstance
(
scale_v
,
Variable
)
scale_var
=
var_scale_map
[
block_id
][
_original_var_name
(
name
)]
scale_var
=
scale_v
if
len
(
op
.
output_arg_names
)
!=
1
:
raise
ValueError
(
"Only support one output, but op %s has"
...
...
@@ -309,7 +315,7 @@ class QuantizeTranspiler(object):
op_type
=
op
.
type
# insert dequant_op after fc/conv, need to rename
# input of the followed ops
# input of the followed ops
(of fc/conv) to the dquant_op
for
name
in
op
.
input_arg_names
:
if
name
in
op_out_rename_map
[
block_id
]:
op
.
_rename_input
(
name
,
...
...
@@ -389,8 +395,8 @@ class QuantizeTranspiler(object):
for
op
in
block
.
ops
:
args
+=
op
.
input_arg_names
args
+=
op
.
output_arg_names
args
=
list
(
set
(
args
))
var_names
=
block
.
vars
.
keys
()
args
=
list
(
set
(
args
))
#vals of all left ops
var_names
=
block
.
vars
.
keys
()
# all vals
sub_block_remove_vars
=
[]
for
var
in
var_names
:
if
var
not
in
args
:
...
...
@@ -471,6 +477,61 @@ class QuantizeTranspiler(object):
return
quant_var
,
scale
def
_insert_quant_moving_average_abs_max_op
(
self
,
block
,
idx
,
var
,
quant_bits
):
"""Insert fake_quantize_moving_average_abs_max
"""
quant_var
=
block
.
create_var
(
name
=
_quantized_var_name
(
var
.
name
),
type
=
var
.
type
,
shape
=
var
.
shape
,
dtype
=
var
.
dtype
)
state
=
self
.
helper
.
create_global_variable
(
name
=
unique_name
.
generate
(
'state'
),
persistable
=
True
,
dtype
=
var
.
dtype
,
shape
=
[
1
])
self
.
helper
.
set_variable_initializer
(
state
,
initializer
=
Constant
(
value
=
1
))
accum
=
self
.
helper
.
create_global_variable
(
name
=
unique_name
.
generate
(
'accum'
),
persistable
=
True
,
dtype
=
var
.
dtype
,
shape
=
[
1
])
self
.
helper
.
set_variable_initializer
(
accum
,
initializer
=
Constant
(
value
=
1
))
scale
=
self
.
helper
.
create_parameter
(
attr
=
ParamAttr
(
name
=
_quantized_scale_name
(
var
.
name
),
initializer
=
Constant
(
0.001
),
trainable
=
False
),
shape
=
[
1
],
dtype
=
var
.
dtype
)
scale
.
stop_gradient
=
True
ins
=
{
'X'
:
var
,
'InScale'
:
scale
}
outs
=
{
'Out'
:
quant_var
,
'OutScale'
:
scale
}
if
not
self
.
is_test
:
ins
[
'InState'
]
=
state
ins
[
'InAccum'
]
=
accum
outs
[
'OutState'
]
=
state
outs
[
'OutAccum'
]
=
accum
attrs
=
{
'bit_length'
:
quant_bits
,
'moving_rate'
:
self
.
moving_rate
,
'is_test'
:
self
.
is_test
}
quant_op
=
block
.
_insert_op
(
idx
,
type
=
'fake_quantize_moving_average_abs_max'
,
attrs
=
attrs
,
inputs
=
ins
,
outputs
=
outs
)
return
quant_var
,
scale
def
_insert_quant_op
(
self
,
block
,
idx
,
var
,
quant_bits
,
quant_type
):
"""
Insert fake_quantize_op
...
...
@@ -480,6 +541,9 @@ class QuantizeTranspiler(object):
elif
quant_type
==
'range_abs_max'
:
return
self
.
_insert_quant_range_abs_max_op
(
block
,
idx
,
var
,
quant_bits
)
elif
quant_type
==
'moving_average_abs_max'
:
return
self
.
_insert_quant_moving_average_abs_max_op
(
block
,
idx
,
var
,
quant_bits
)
def
_insert_dequant_op
(
self
,
block
,
idx
,
var
,
scale
,
quant_bits
):
"""
...
...
python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
浏览文件 @
81b4fad8
...
...
@@ -38,7 +38,8 @@ class QuantizationTransformPass(object):
activation_bits
=
8
,
activation_quantize_type
=
'abs_max'
,
weight_quantize_type
=
'abs_max'
,
window_size
=
10000
):
window_size
=
10000
,
moving_rate
=
0.9
):
"""
Convert and rewrite the IrGraph according to weight and
activation quantization type.
...
...
@@ -83,19 +84,22 @@ class QuantizationTransformPass(object):
self
.
_weight_bits
=
weight_bits
self
.
_activation_bits
=
activation_bits
quant_type
=
[
'abs_max'
,
'range_abs_max'
]
quant_type
=
[
'abs_max'
,
'range_abs_max'
,
'moving_average_abs_max'
]
if
activation_quantize_type
not
in
quant_type
:
raise
ValueError
(
"Unknown activation_quantize_type : '%s'. It can only be "
,
"'abs_max' or 'range_abs_max'."
,
str
(
activation_quantize_type
))
"'abs_max' or 'range_abs_max' or 'moving_average_abs_max'."
,
str
(
activation_quantize_type
))
if
weight_quantize_type
not
in
quant_type
:
raise
ValueError
(
"Unknown weight_quantize_type: '%s'. It can only be "
,
"'abs_max' or 'range_abs_max'."
,
str
(
weight_quantize_type
))
"'abs_max' or 'range_abs_max' or 'moving_average_abs_max'."
,
str
(
weight_quantize_type
))
self
.
_activation_quantize_type
=
activation_quantize_type
self
.
_weight_quantize_type
=
weight_quantize_type
self
.
_window_size
=
window_size
self
.
_moving_rate
=
moving_rate
self
.
_need_initialized
=
collections
.
OrderedDict
()
self
.
_quantizable_ops
=
[
'conv2d'
,
'depthwise_conv2d'
,
'mul'
]
...
...
@@ -222,6 +226,9 @@ class QuantizationTransformPass(object):
elif
quant_type
==
'range_abs_max'
:
return
self
.
_insert_quant_range_abs_max_op
(
graph
,
var_node
,
quant_bits
)
elif
quant_type
==
'moving_average_abs_max'
:
return
self
.
_insert_quant_moving_average_abs_max_op
(
graph
,
var_node
,
quant_bits
)
def
_insert_quant_abs_max_op
(
self
,
graph
,
var_node
,
quant_bits
):
"""
...
...
@@ -309,6 +316,74 @@ class QuantizationTransformPass(object):
return
quant_var_node
,
scale_out_node
def
_insert_quant_moving_average_abs_max_op
(
self
,
graph
,
var_node
,
quant_bits
):
"""Insert fake_quantize_moving_average_abs_max
"""
quant_var_node
=
graph
.
create_var_node
(
name
=
self
.
_quantized_var_name
(
var_node
.
name
()),
var_type
=
var_node
.
type
(),
shape
=
var_node
.
shape
(),
var_dtype
=
var_node
.
dtype
())
scale_in_node
=
graph
.
create_persistable_node
(
name
=
self
.
_quantized_scale_name
(
var_node
.
name
()),
var_type
=
core
.
VarDesc
.
VarType
.
LOD_TENSOR
,
shape
=
[
1
],
var_dtype
=
var_node
.
dtype
())
self
.
_need_initialized
[
scale_in_node
.
var
()]
=
Constant
(
value
=
0.001
)
scale_out_node
=
graph
.
create_var_node_from_desc
(
scale_in_node
.
var
())
ins
=
{
'X'
:
var_node
,
'InScale'
:
scale_in_node
}
outs
=
{
'Out'
:
quant_var_node
,
'OutScale'
:
scale_out_node
}
if
not
self
.
_is_test
:
state_in_node
=
graph
.
create_persistable_node
(
name
=
unique_name
.
generate
(
'state'
),
var_type
=
core
.
VarDesc
.
VarType
.
LOD_TENSOR
,
var_dtype
=
var_node
.
dtype
(),
shape
=
[
1
])
self
.
_need_initialized
[
state_in_node
.
var
()]
=
Constant
(
value
=
1
)
accum_in_node
=
graph
.
create_persistable_node
(
name
=
unique_name
.
generate
(
'accum'
),
var_type
=
core
.
VarDesc
.
VarType
.
LOD_TENSOR
,
var_dtype
=
var_node
.
dtype
(),
shape
=
[
1
])
self
.
_need_initialized
[
accum_in_node
.
var
()]
=
Constant
(
value
=
1
)
state_out_node
=
graph
.
create_var_node_from_desc
(
state_in_node
.
var
(
))
accum_out_node
=
graph
.
create_var_node_from_desc
(
accum_in_node
.
var
(
))
ins
[
'InState'
]
=
state_in_node
ins
[
'InAccum'
]
=
accum_in_node
outs
[
'OutState'
]
=
state_out_node
outs
[
'OutAccum'
]
=
accum_out_node
attrs
=
{
'bit_length'
:
quant_bits
,
'moving_rate'
:
self
.
_moving_rate
,
'is_test'
:
self
.
_is_test
,
'op_role'
:
core
.
op_proto_and_checker_maker
.
OpRole
.
Forward
}
quant_op_node
=
graph
.
create_op_node
(
op_type
=
'fake_quantize_moving_average_abs_max'
,
attrs
=
attrs
,
inputs
=
ins
,
outputs
=
outs
)
graph
.
link_to
(
var_node
,
quant_op_node
)
graph
.
link_to
(
scale_in_node
,
quant_op_node
)
graph
.
link_to
(
quant_op_node
,
quant_var_node
)
graph
.
link_to
(
quant_op_node
,
scale_out_node
)
if
not
self
.
_is_test
:
graph
.
link_to
(
state_in_node
,
quant_op_node
)
graph
.
link_to
(
accum_in_node
,
quant_op_node
)
graph
.
link_to
(
quant_op_node
,
state_out_node
)
graph
.
link_to
(
quant_op_node
,
accum_out_node
)
return
quant_var_node
,
scale_out_node
def
_insert_dequant_op
(
self
,
graph
,
var_node
,
scale_var_node
,
quant_bits
):
"""
Insert fake_dequantize_op in the graph.
...
...
@@ -389,7 +464,8 @@ class QuantizationFreezePass(object):
self
.
_weight_quantize_type
=
weight_quantize_type
self
.
_quantizable_ops
=
[
'conv2d'
,
'depthwise_conv2d'
,
'mul'
]
self
.
_fake_quant_op_names
=
[
'fake_quantize_abs_max'
,
'fake_quantize_range_abs_max'
'fake_quantize_abs_max'
,
'fake_quantize_range_abs_max'
,
'fake_quantize_moving_average_abs_max'
]
self
.
_fake_dequant_op_names
=
[
'fake_dequantize_max_abs'
]
self
.
_op_input_rename_map
=
collections
.
OrderedDict
()
...
...
python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
浏览文件 @
81b4fad8
...
...
@@ -164,6 +164,9 @@ class TestQuantizationTransformPass(unittest.TestCase):
def
test_linear_fc_quant_range_abs_max
(
self
):
self
.
linear_fc_quant
(
'range_abs_max'
,
for_ci
=
True
)
def
test_linear_fc_quant_moving_average_abs_max
(
self
):
self
.
linear_fc_quant
(
'moving_average_abs_max'
,
for_ci
=
True
)
def
residual_block_quant
(
self
,
quant_type
,
for_ci
=
False
):
main
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
...
...
@@ -201,6 +204,9 @@ class TestQuantizationTransformPass(unittest.TestCase):
def
test_residual_block_range_abs_max
(
self
):
self
.
residual_block_quant
(
'range_abs_max'
,
for_ci
=
True
)
def
test_residual_block_moving_average_abs_max
(
self
):
self
.
residual_block_quant
(
'moving_average_abs_max'
,
for_ci
=
True
)
class
TestQuantizationFreezePass
(
unittest
.
TestCase
):
def
freeze_graph
(
self
,
use_cuda
,
seed
,
quant_type
,
for_ci
=
False
):
...
...
@@ -380,11 +386,18 @@ class TestQuantizationFreezePass(unittest.TestCase):
with
fluid
.
unique_name
.
guard
():
self
.
freeze_graph
(
True
,
seed
=
1
,
quant_type
=
'range_abs_max'
,
for_ci
=
True
)
self
.
freeze_graph
(
True
,
seed
=
1
,
quant_type
=
'moving_average_abs_max'
,
for_ci
=
True
)
def
test_freeze_graph_cpu_static
(
self
):
with
fluid
.
unique_name
.
guard
():
self
.
freeze_graph
(
False
,
seed
=
2
,
quant_type
=
'range_abs_max'
,
for_ci
=
True
)
self
.
freeze_graph
(
False
,
seed
=
2
,
quant_type
=
'moving_average_abs_max'
,
for_ci
=
True
)
if
__name__
==
'__main__'
:
...
...
python/paddle/fluid/tests/unittests/test_fake_quantize_op.py
浏览文件 @
81b4fad8
...
...
@@ -17,6 +17,7 @@ from __future__ import print_function
import
unittest
import
numpy
as
np
from
op_test
import
OpTest
import
paddle.fluid.core
as
core
class
TestFakeQuantizeOp
(
OpTest
):
...
...
@@ -75,6 +76,7 @@ class TestFakeQuantizeRangeAbsMaxOp(OpTest):
'InScale'
:
np
.
zeros
(
1
).
astype
(
"float32"
)
}
scale
=
np
.
max
(
np
.
abs
(
self
.
inputs
[
'X'
])).
astype
(
"float32"
)
out_scales
=
np
.
zeros
(
self
.
attrs
[
'window_size'
]).
astype
(
"float32"
)
out_scales
[
0
]
=
scale
self
.
outputs
=
{
...
...
@@ -88,6 +90,46 @@ class TestFakeQuantizeRangeAbsMaxOp(OpTest):
self
.
check_output
()
class
TestFakeQuantizeMovingOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"fake_quantize_moving_average_abs_max"
self
.
attrs
=
{
'bit_length'
:
int
(
5
),
'moving_rate'
:
float
(
0.9
),
'is_test'
:
False
}
accum
=
np
.
zeros
(
1
).
astype
(
"float32"
)
accum
[
0
]
=
1
state
=
np
.
zeros
(
1
).
astype
(
"float32"
)
state
[
0
]
=
1
scale
=
np
.
zeros
(
1
).
astype
(
"float32"
)
scale
[
0
]
=
0.001
self
.
inputs
=
{
'X'
:
np
.
random
.
random
((
8
,
16
,
7
,
7
)).
astype
(
"float32"
),
'InScale'
:
scale
,
'InAccum'
:
accum
,
'InState'
:
state
,
}
out_accum
=
np
.
zeros
(
1
).
astype
(
"float32"
)
out_state
=
np
.
zeros
(
1
).
astype
(
"float32"
)
out_scale
=
np
.
zeros
(
1
).
astype
(
"float32"
)
out_accum
[
0
]
=
self
.
attrs
[
'moving_rate'
]
*
accum
[
0
]
+
np
.
max
(
np
.
abs
(
self
.
inputs
[
'X'
])).
astype
(
"float32"
)
out_state
[
0
]
=
self
.
attrs
[
'moving_rate'
]
*
state
[
0
]
+
1
out_scale
=
out_accum
/
out_state
self
.
outputs
=
{
'Out'
:
np
.
round
(
self
.
inputs
[
'X'
]
/
out_scale
*
(
(
1
<<
(
self
.
attrs
[
'bit_length'
]
-
1
))
-
1
)),
'OutAccum'
:
out_accum
,
'OutState'
:
out_state
,
'OutScale'
:
out_scale
,
}
def
test_check_output
(
self
):
self
.
check_output
()
class
TestFakeQuantizeRangeAbsMaxOp2
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"fake_quantize_range_abs_max"
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录