Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
c64f2204
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c64f2204
编写于
1月 26, 2019
作者:
W
WangZhen
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add convert_to_int8 pass and transform_for_mobile pass and their UTs.
上级
c8095eeb
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
207 addition
and
60 deletion
+207
-60
python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
...ddle/fluid/contrib/slim/quantization/quantization_pass.py
+102
-4
python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
...paddle/fluid/contrib/slim/tests/test_quantization_pass.py
+84
-51
python/paddle/fluid/contrib/tests/test_quantize_transpiler.py
...on/paddle/fluid/contrib/tests/test_quantize_transpiler.py
+21
-5
未找到文件。
python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
浏览文件 @
c64f2204
...
...
@@ -21,7 +21,10 @@ from ....framework import Program
from
....initializer
import
Constant
from
....
import
unique_name
__all__
=
[
'QuantizationTransformPass'
,
'QuantizationFreezePass'
]
__all__
=
[
'QuantizationTransformPass'
,
'QuantizationFreezePass'
,
'ConvertToInt8Pass'
,
'TransformForMobilePass'
]
class
QuantizationTransformPass
(
object
):
...
...
@@ -394,6 +397,7 @@ class QuantizationFreezePass(object):
# remove the unused var node in the graph
self
.
_remove_unused_var_nodes
(
graph
)
return
graph
def
_remove_fake_quant_and_dequant_op
(
self
,
graph
,
op_node
):
k
=
op_node
.
op
().
output
(
'Out'
)[
0
]
...
...
@@ -453,9 +457,9 @@ class QuantizationFreezePass(object):
def
_load_var
(
self
,
name
):
return
np
.
array
(
self
.
_scope
.
find_var
(
name
).
get_tensor
())
def
_restore_var
(
self
,
name
,
arr
):
t
=
self
.
_scope
.
find_var
(
name
).
get_tensor
()
t
.
set
(
arr
,
self
.
_place
)
def
_restore_var
(
self
,
name
,
arr
ay
):
t
ensor
=
self
.
_scope
.
find_var
(
name
).
get_tensor
()
t
ensor
.
set
(
array
,
self
.
_place
)
def
_remove_unused_var_nodes
(
self
,
graph
):
all_used_vars
=
set
()
...
...
@@ -496,3 +500,97 @@ class QuantizationFreezePass(object):
def
_quant
(
self
,
x
,
scale
,
num_bits
):
return
np
.
round
(
x
/
scale
*
((
1
<<
(
num_bits
-
1
))
-
1
))
class
ConvertToInt8Pass
(
object
):
def
__init__
(
self
,
scope
,
place
):
assert
scope
is
not
None
,
\
'The scope cannot be set None.'
assert
place
is
not
None
,
\
'The place cannot be set None.'
self
.
_scope
=
scope
self
.
_place
=
place
self
.
_quantizable_ops
=
[
'conv2d'
,
'depthwise_conv2d'
,
'mul'
]
def
apply
(
self
,
graph
):
persistable_vars
=
[
p
.
name
()
for
p
in
graph
.
all_persistable_vars
()]
ops
=
graph
.
all_ops
()
input_map
=
{}
for
op_node
in
ops
:
op_name
=
op_node
.
name
()
if
op_name
in
self
.
_quantizable_ops
:
for
var_node
in
op_node
.
inputs
:
name
=
var_node
.
name
()
if
name
in
persistable_vars
:
if
name
not
in
input_map
:
int8_var_node
=
self
.
_convert_to_int8
(
graph
,
var_node
)
input_map
[
name
]
=
int8_var_node
graph
.
update_input_link
(
var_node
,
input_map
[
name
],
op_node
)
# remove the unused var node in the graph
self
.
_remove_unused_var_nodes
(
graph
)
return
graph
def
_convert_to_int8
(
self
,
graph
,
var_node
):
int8_var_node_name
=
var_node
.
name
()
+
".int8"
int8_var_node
=
graph
.
create_param_node
(
name
=
cpt
.
to_text
(
int8_var_node_name
),
var_type
=
var_node
.
var
().
type
(),
shape
=
var_node
.
var
().
shape
(),
var_dtype
=
core
.
VarDesc
.
VarType
.
INT8
)
array
=
self
.
_load_var
(
var_node
.
name
())
self
.
_scope
.
var
(
int8_var_node_name
)
self
.
_store_var
(
int8_var_node_name
,
array
,
np
.
int8
)
return
int8_var_node
def
_load_var
(
self
,
name
):
return
np
.
array
(
self
.
_scope
.
find_var
(
name
).
get_tensor
())
def
_store_var
(
self
,
name
,
array
,
dtype
):
tensor
=
self
.
_scope
.
find_var
(
name
).
get_tensor
()
tensor
.
set
(
array
.
astype
(
dtype
),
self
.
_place
)
def
_remove_unused_var_nodes
(
self
,
graph
):
all_used_vars
=
set
()
ops
=
graph
.
all_ops
()
for
op_node
in
ops
:
for
input_node
in
op_node
.
inputs
:
all_used_vars
.
add
(
input_node
)
for
output_node
in
op_node
.
outputs
:
all_used_vars
.
add
(
output_node
)
all_unused_vars
=
graph
.
all_vars
()
-
all_used_vars
graph
.
safe_remove_nodes
(
all_unused_vars
)
class
TransformForMobilePass
(
object
):
def
__init__
(
self
):
self
.
_fake_quant_op_names
=
[
'fake_quantize_abs_max'
,
'fake_quantize_range_abs_max'
]
self
.
_fake_dequant_op_names
=
[
'fake_dequantize_max_abs'
]
def
apply
(
self
,
graph
):
ops
=
graph
.
all_ops
()
for
op_node
in
ops
:
name
=
op_node
.
name
()
if
name
in
self
.
_fake_quant_op_names
:
op_node
.
op
().
set_type
(
'quantize'
)
quant_node
=
graph
.
create_op_node_from_desc
(
op_node
.
op
())
for
input_node
in
op_node
.
inputs
:
graph
.
link_to
(
input_node
,
quant_node
)
for
output_node
in
op_node
.
outputs
:
graph
.
link_to
(
quant_node
,
output_node
)
graph
.
safe_remove_nodes
(
op_node
)
if
name
in
self
.
_fake_dequant_op_names
:
op_node
.
op
().
set_type
(
'dequantize'
)
dequant_node
=
graph
.
create_op_node_from_desc
(
op_node
.
op
())
for
input_node
in
op_node
.
inputs
:
graph
.
link_to
(
input_node
,
dequant_node
)
for
output_node
in
op_node
.
outputs
:
graph
.
link_to
(
dequant_node
,
output_node
)
graph
.
safe_remove_nodes
(
op_node
)
return
graph
python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
浏览文件 @
c64f2204
...
...
@@ -18,10 +18,11 @@ import numpy as np
import
paddle.fluid
as
fluid
import
six
import
paddle
from
paddle.fluid.framework
import
Program
from
paddle.fluid.framework
import
IrGraph
from
paddle.fluid.contrib.slim.quantization
import
QuantizationTransformPass
from
paddle.fluid.contrib.slim.quantization
import
QuantizationFreezePass
from
paddle.fluid.contrib.slim.quantization
import
ConvertToInt8Pass
from
paddle.fluid.contrib.slim.quantization
import
TransformForMobilePass
from
paddle.fluid
import
core
...
...
@@ -233,10 +234,22 @@ class TestQuantizationFreezePass(unittest.TestCase):
scope
=
scope
,
program_exe
=
exe
,
activation_quantize_type
=
quant_type
)
transform_pass
.
apply
(
main_graph
)
transform_pass
.
apply
(
test_graph
)
dev_name
=
'_gpu_'
if
use_cuda
else
'_cpu_'
marked_nodes
=
set
()
for
op
in
main_graph
.
all_ops
():
if
op
.
name
().
find
(
'quantize'
)
>
-
1
:
marked_nodes
.
add
(
op
)
main_graph
.
draw
(
'.'
,
'main'
+
dev_name
+
quant_type
,
marked_nodes
)
marked_nodes
=
set
()
for
op
in
test_graph
.
all_ops
():
if
op
.
name
().
find
(
'quantize'
)
>
-
1
:
marked_nodes
.
add
(
op
)
test_graph
.
draw
(
'.'
,
'test'
+
dev_name
+
quant_type
,
marked_nodes
)
quantized_main_program
=
main_graph
.
to_program
()
quantized_test_program
=
test_graph
.
to_program
()
iters
=
5
batch_size
=
8
dev_name
=
'_gpu_'
if
use_cuda
else
'_cpu_'
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
...
...
@@ -248,66 +261,86 @@ class TestQuantizationFreezePass(unittest.TestCase):
with
fluid
.
scope_guard
(
scope
):
for
_
in
range
(
iters
):
data
=
next
(
train_reader
())
loss_v
=
exe
.
run
(
program
=
main_graph
.
to_program
()
,
loss_v
=
exe
.
run
(
program
=
quantized_main_program
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
loss
])
print
(
'{}: {}'
.
format
(
dev_nam
e
,
loss_v
))
print
(
'{}: {}'
.
format
(
'loss'
+
dev_name
+
quant_typ
e
,
loss_v
))
test_data
=
next
(
test_reader
())
with
fluid
.
program_guard
(
quantized_test_program
):
w_var
=
fluid
.
framework
.
_get_var
(
'conv2d_1.w_0.quantized'
,
quantized_test_program
)
# Testing
with
fluid
.
scope_guard
(
scope
):
test_loss1
,
w_quant
=
exe
.
run
(
program
=
quantized_test_program
,
feed
=
feeder
.
feed
(
test_data
),
fetch_list
=
[
loss
,
w_var
])
# Freeze graph for inference, but the weight of fc/conv is still float type.
freeze_pass
=
QuantizationFreezePass
(
scope
=
scope
,
place
=
place
)
freeze_pass
.
apply
(
test_graph
)
marked_nodes
=
set
()
for
op
in
main
_graph
.
all_ops
():
for
op
in
test
_graph
.
all_ops
():
if
op
.
name
().
find
(
'quantize'
)
>
-
1
:
marked_nodes
.
add
(
op
)
main_graph
.
draw
(
'.'
,
'main'
+
dev_name
+
quant_type
,
marked_nodes
)
test_graph
.
draw
(
'.'
,
'test_freeze'
+
dev_name
+
quant_type
,
marked_nodes
)
freeze_pass
=
QuantizationFreezePass
(
scope
=
scope
,
place
=
place
)
origin_marked_nodes
=
set
()
server_program
=
test_graph
.
to_program
()
with
fluid
.
scope_guard
(
scope
):
test_loss2
,
=
exe
.
run
(
program
=
server_program
,
feed
=
feeder
.
feed
(
test_data
),
fetch_list
=
[
loss
])
self
.
assertAlmostEqual
(
test_loss1
,
test_loss2
,
delta
=
5e-3
)
print
(
'{}: {}'
.
format
(
'test_loss1'
+
dev_name
+
quant_type
,
test_loss1
))
print
(
'{}: {}'
.
format
(
'test_loss2'
+
dev_name
+
quant_type
,
test_loss2
))
w_freeze
=
np
.
array
(
scope
.
find_var
(
'conv2d_1.w_0'
).
get_tensor
())
# Maybe failed, this is due to the calculation precision
self
.
assertAlmostEqual
(
np
.
sum
(
w_freeze
),
np
.
sum
(
w_quant
))
print
(
'{}: {}'
.
format
(
'w_freeze'
+
dev_name
+
quant_type
,
np
.
sum
(
w_freeze
)))
print
(
'{}: {}'
.
format
(
'w_quant'
+
dev_name
+
quant_type
,
np
.
sum
(
w_quant
)))
# Convert parameter to 8-bit.
convert_int8_pass
=
ConvertToInt8Pass
(
scope
=
scope
,
place
=
place
)
convert_int8_pass
.
apply
(
test_graph
)
marked_nodes
=
set
()
for
op
in
test_graph
.
all_ops
():
if
op
.
name
().
find
(
'quantize'
)
>
-
1
:
origin_marked_nodes
.
add
(
op
)
test_graph
.
draw
(
'.'
,
'test_origin'
+
dev_name
+
quant_type
,
origin_marked_nodes
)
freeze_pass
.
apply
(
test_graph
)
freeze_marked_nodes
=
set
()
marked_nodes
.
add
(
op
)
test_graph
.
draw
(
'.'
,
'test_int8'
+
dev_name
+
quant_type
,
marked_nodes
)
server_program_int8
=
test_graph
.
to_program
()
# Save the 8-bit parameter and model file.
with
fluid
.
scope_guard
(
scope
):
fluid
.
io
.
save_inference_model
(
'server_int8'
+
dev_name
+
quant_type
,
[
'image'
,
'label'
],
[
loss
],
exe
,
server_program_int8
)
# Test whether the 8-bit parameter and model file can be loaded successfully.
[
infer
,
feed
,
fetch
]
=
fluid
.
io
.
load_inference_model
(
'server_int8'
+
dev_name
+
quant_type
,
exe
)
# Check the loaded 8-bit weight.
w_8bit
=
np
.
array
(
scope
.
find_var
(
'conv2d_1.w_0.int8'
).
get_tensor
())
self
.
assertEqual
(
w_8bit
.
dtype
,
np
.
int8
)
self
.
assertEqual
(
np
.
sum
(
w_8bit
),
np
.
sum
(
w_freeze
))
print
(
'{}: {}'
.
format
(
'w_8bit'
+
dev_name
+
quant_type
,
np
.
sum
(
w_8bit
)))
print
(
'{}: {}'
.
format
(
'w_freeze'
+
dev_name
+
quant_type
,
np
.
sum
(
w_freeze
)))
mobile_pass
=
TransformForMobilePass
()
mobile_pass
.
apply
(
test_graph
)
marked_nodes
=
set
()
for
op
in
test_graph
.
all_ops
():
if
op
.
name
().
find
(
'quantize'
)
>
-
1
:
freeze_marked_nodes
.
add
(
op
)
test_graph
.
draw
(
'.'
,
'test_freeze'
+
dev_name
+
quant_type
,
freeze_marked_nodes
)
# with fluid.program_guard(test_program):
# test_data = next(test_reader())
# w_var = fluid.framework._get_var('conv2d_1.w_0.quantized',
# test_program)
# # Testing during training
# test_loss1, w_quant = exe.run(program=test_program,
# feed=feeder.feed(test_data),
# fetch_list=[loss, w_var])
# # Freeze program for inference, but the weight of fc/conv is still float type.
# quant_transpiler.freeze_program(test_program, place)
# test_loss2, = exe.run(program=test_program,
# feed=feeder.feed(test_data),
# fetch_list=[loss])
# self.assertAlmostEqual(test_loss1, test_loss2, delta=5e-3)
# w_freeze = np.array(fluid.global_scope().find_var('conv2d_1.w_0')
# .get_tensor())
# # fail: -432.0 != -433.0, this is due to the calculation precision
# #self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant))
# # Convert parameter to 8-bit.
# quant_transpiler.convert_to_int8(test_program, place)
# # Save the 8-bit parameter and model file.
# fluid.io.save_inference_model('model_8bit', ['image', 'label'],
# [loss], exe, test_program)
# # Test whether the 8-bit parameter and model file can be loaded successfully.
# [infer, feed, fetch] = fluid.io.load_inference_model('model_8bit',
# exe)
# # Check the loaded 8-bit weight.
# w_8bit = np.array(fluid.global_scope().find_var('conv2d_1.w_0.int8')
# .get_tensor())
# self.assertEqual(w_8bit.dtype, np.int8)
# self.assertEqual(np.sum(w_8bit), np.sum(w_freeze))
marked_nodes
.
add
(
op
)
test_graph
.
draw
(
'.'
,
'test_mobile'
+
dev_name
+
quant_type
,
marked_nodes
)
mobile_program
=
test_graph
.
to_program
()
with
fluid
.
scope_guard
(
scope
):
fluid
.
io
.
save_inference_model
(
'mobile_int8'
+
dev_name
+
quant_type
,
[
'image'
,
'label'
],
[
loss
],
exe
,
mobile_program
)
def
test_freeze_program_cuda_dynamic
(
self
):
if
fluid
.
core
.
is_compiled_with_cuda
():
...
...
python/paddle/fluid/contrib/tests/test_quantize_transpiler.py
浏览文件 @
c64f2204
...
...
@@ -204,9 +204,11 @@ class TestQuantizeTranspiler(unittest.TestCase):
build_program
(
test_program
,
startup
,
True
)
test_program
=
test_program
.
clone
(
for_test
=
True
)
quant_transpiler
=
QuantizeTranspiler
()
quant_transpiler
.
training_transpile
(
main
)
quant_transpiler
.
training_transpile
(
test_program
)
quant_type
=
'abs_max'
quant_transpiler
=
QuantizeTranspiler
(
activation_quantize_type
=
quant_type
)
quant_transpiler
.
training_transpile
(
main
,
startup
)
quant_transpiler
.
training_transpile
(
test_program
,
startup
)
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
...
...
@@ -223,12 +225,14 @@ class TestQuantizeTranspiler(unittest.TestCase):
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
batch_size
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
feeds
,
place
=
place
)
dev_name
=
'_gpu_'
if
use_cuda
else
'_cpu_'
with
fluid
.
program_guard
(
main
):
for
_
in
range
(
iters
):
data
=
next
(
train_reader
())
loss_v
=
exe
.
run
(
program
=
main
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
loss
])
print
(
'{}: {}'
.
format
(
'loss'
+
dev_name
+
quant_type
,
loss_v
))
with
fluid
.
program_guard
(
test_program
):
test_data
=
next
(
test_reader
())
...
...
@@ -245,11 +249,19 @@ class TestQuantizeTranspiler(unittest.TestCase):
feed
=
feeder
.
feed
(
test_data
),
fetch_list
=
[
loss
])
self
.
assertAlmostEqual
(
test_loss1
,
test_loss2
,
delta
=
5e-3
)
print
(
'{}: {}'
.
format
(
'test_loss1'
+
dev_name
+
quant_type
,
test_loss1
))
print
(
'{}: {}'
.
format
(
'test_loss2'
+
dev_name
+
quant_type
,
test_loss2
))
w_freeze
=
np
.
array
(
fluid
.
global_scope
().
find_var
(
'conv2d_1.w_0'
)
.
get_tensor
())
# fail: -432.0 != -433.0, this is due to the calculation precision
#self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant))
print
(
'{}: {}'
.
format
(
'w_freeze'
+
dev_name
+
quant_type
,
np
.
sum
(
w_freeze
)))
print
(
'{}: {}'
.
format
(
'w_quant'
+
dev_name
+
quant_type
,
np
.
sum
(
w_quant
)))
# Convert parameter to 8-bit.
quant_transpiler
.
convert_to_int8
(
test_program
,
place
)
# Save the 8-bit parameter and model file.
...
...
@@ -264,13 +276,17 @@ class TestQuantizeTranspiler(unittest.TestCase):
self
.
assertEqual
(
w_8bit
.
dtype
,
np
.
int8
)
self
.
assertEqual
(
np
.
sum
(
w_8bit
),
np
.
sum
(
w_freeze
))
print
(
'{}: {}'
.
format
(
'w_8bit'
+
dev_name
+
quant_type
,
np
.
sum
(
w_8bit
)))
print
(
'{}: {}'
.
format
(
'w_freeze'
+
dev_name
+
quant_type
,
np
.
sum
(
w_freeze
)))
def
not_
test_freeze_program_cuda
(
self
):
def
test_freeze_program_cuda
(
self
):
if
fluid
.
core
.
is_compiled_with_cuda
():
with
fluid
.
unique_name
.
guard
():
self
.
freeze_program
(
True
,
seed
=
1
)
def
not_
test_freeze_program_cpu
(
self
):
def
test_freeze_program_cpu
(
self
):
with
fluid
.
unique_name
.
guard
():
self
.
freeze_program
(
False
,
seed
=
2
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录