Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleSlim
提交
64f9442b
P
PaddleSlim
项目概览
PaddlePaddle
/
PaddleSlim
1 年多 前同步成功
通知
51
Star
1434
Fork
344
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
53
列表
看板
标记
里程碑
合并请求
16
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleSlim
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
53
Issue
53
列表
看板
标记
里程碑
合并请求
16
合并请求
16
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
64f9442b
编写于
8月 10, 2020
作者:
B
Bai Yifan
提交者:
GitHub
8月 10, 2020
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' into pact_clip
上级
14f1d583
a45431c9
变更
8
显示空白变更内容
内联
并排
Showing
8 changed file
with
47 addition
and
47 deletion
+47
-47
demo/quant/pact_quant_aware/README.md
demo/quant/pact_quant_aware/README.md
+2
-2
demo/quant/pact_quant_aware/train.py
demo/quant/pact_quant_aware/train.py
+25
-7
docs/zh_cn/quick_start/quant_aware_tutorial.md
docs/zh_cn/quick_start/quant_aware_tutorial.md
+3
-3
docs/zh_cn/tutorials/image_classification_mkldnn_quant_tutorial.md
...n/tutorials/image_classification_mkldnn_quant_tutorial.md
+1
-1
paddleslim/models/util.py
paddleslim/models/util.py
+1
-1
paddleslim/prune/group_param.py
paddleslim/prune/group_param.py
+4
-3
paddleslim/prune/prune_walker.py
paddleslim/prune/prune_walker.py
+3
-26
tests/test_prune.py
tests/test_prune.py
+8
-4
未找到文件。
demo/quant/pact_quant_aware/README.md
浏览文件 @
64f9442b
...
...
@@ -159,7 +159,7 @@ compiled_train_prog = compiled_train_prog.with_data_parallel(
普通量化:
```
python train.py --model MobileNetV3_large_x1_0 --pretrained_model ./pretrain/MobileNetV3_large_x1_0_ssld_pretrained --
checkpoint_dir ./output/MobileNetV3_large_x1_0 --
num_epochs 30 --lr 0.0001 --use_pact False
python train.py --model MobileNetV3_large_x1_0 --pretrained_model ./pretrain/MobileNetV3_large_x1_0_ssld_pretrained --num_epochs 30 --lr 0.0001 --use_pact False
```
...
...
@@ -179,7 +179,7 @@ python train.py --model MobileNetV3_large_x1_0 --pretrained_model ./pretrain/Mob
使用PACT量化训练
```
python train.py --model MobileNetV3_large_x1_0 --pretrained_model ./pretrain/MobileNetV3_large_x1_0_ssld_pretrained --
checkpoint_dir ./output/MobileNetV3_large_x1_0 --
num_epochs 30 --lr 0.0001 --use_pact True --batch_size 128 --lr_strategy=piecewise_decay --step_epochs 20 --l2_decay 1e-5
python train.py --model MobileNetV3_large_x1_0 --pretrained_model ./pretrain/MobileNetV3_large_x1_0_ssld_pretrained --num_epochs 30 --lr 0.0001 --use_pact True --batch_size 128 --lr_strategy=piecewise_decay --step_epochs 20 --l2_decay 1e-5
```
输出结果为
...
...
demo/quant/pact_quant_aware/train.py
浏览文件 @
64f9442b
...
...
@@ -55,8 +55,12 @@ add_arg('data', str, "imagenet",
"Which data to use. 'mnist' or 'imagenet'"
)
add_arg
(
'log_period'
,
int
,
10
,
"Log period in batches."
)
add_arg
(
'checkpoint_dir'
,
str
,
"output"
,
"checkpoint save dir"
)
add_arg
(
'checkpoint_dir'
,
str
,
None
,
"checkpoint dir"
)
add_arg
(
'checkpoint_epoch'
,
int
,
None
,
"checkpoint epoch"
)
add_arg
(
'output_dir'
,
str
,
"output/MobileNetV3_large_x1_0"
,
"model save dir"
)
add_arg
(
'use_pact'
,
bool
,
True
,
"Whether to use PACT or not."
)
...
...
@@ -288,6 +292,7 @@ def compress(args):
compiled_train_prog
,
feed
=
data
,
fetch_list
=
[
avg_cost
.
name
,
acc_top1
.
name
,
acc_top5
.
name
])
end_time
=
time
.
time
()
loss_n
=
np
.
mean
(
loss_n
)
acc_top1_n
=
np
.
mean
(
acc_top1_n
)
...
...
@@ -322,24 +327,37 @@ def compress(args):
# train loop
best_acc1
=
0.0
best_epoch
=
0
for
i
in
range
(
args
.
num_epochs
):
start_epoch
=
0
if
args
.
checkpoint_dir
is
not
None
:
ckpt_path
=
args
.
checkpoint_dir
assert
args
.
checkpoint_epoch
is
not
None
,
"checkpoint_epoch must be set"
start_epoch
=
args
.
checkpoint_epoch
fluid
.
io
.
load_persistables
(
exe
,
dirname
=
args
.
checkpoint_dir
,
main_program
=
val_program
)
start_step
=
start_epoch
*
int
(
math
.
ceil
(
float
(
args
.
total_images
)
/
args
.
batch_size
))
v
=
fluid
.
global_scope
().
find_var
(
'@LR_DECAY_COUNTER@'
).
get_tensor
()
v
.
set
(
np
.
array
([
start_step
]).
astype
(
np
.
float32
),
place
)
for
i
in
range
(
start_epoch
,
args
.
num_epochs
):
train
(
i
,
compiled_train_prog
)
acc1
=
test
(
i
,
val_program
)
fluid
.
io
.
save_persistables
(
exe
,
dirname
=
os
.
path
.
join
(
args
.
checkpoin
t_dir
,
str
(
i
)),
dirname
=
os
.
path
.
join
(
args
.
outpu
t_dir
,
str
(
i
)),
main_program
=
val_program
)
if
acc1
>
best_acc1
:
best_acc1
=
acc1
best_epoch
=
i
fluid
.
io
.
save_persistables
(
exe
,
dirname
=
os
.
path
.
join
(
args
.
checkpoin
t_dir
,
'best_model'
),
dirname
=
os
.
path
.
join
(
args
.
outpu
t_dir
,
'best_model'
),
main_program
=
val_program
)
if
os
.
path
.
exists
(
os
.
path
.
join
(
args
.
checkpoin
t_dir
,
'best_model'
)):
if
os
.
path
.
exists
(
os
.
path
.
join
(
args
.
outpu
t_dir
,
'best_model'
)):
fluid
.
io
.
load_persistables
(
exe
,
dirname
=
os
.
path
.
join
(
args
.
checkpoin
t_dir
,
'best_model'
),
dirname
=
os
.
path
.
join
(
args
.
outpu
t_dir
,
'best_model'
),
main_program
=
val_program
)
# 3. Freeze the graph after training by adjusting the quantize
...
...
docs/zh_cn/quick_start/quant_aware_tutorial.md
浏览文件 @
64f9442b
...
...
@@ -126,14 +126,14 @@ test(val_quant_program)
```
python
float_prog
,
int8_prog
=
slim
.
quant
.
convert
(
val_quant_program
,
exe
.
place
,
save_int8
=
True
)
target_vars
=
[
float_prog
.
global_block
().
var
(
name
)
for
name
in
outputs
]
target_vars
=
[
float_prog
.
global_block
().
var
(
outputs
[
-
1
])
]
fluid
.
io
.
save_inference_model
(
dirname
=
'./inference_model/float'
,
feeded_var_names
=
[
var
.
name
for
var
in
inputs
],
feeded_var_names
=
[
inputs
[
0
].
name
],
target_vars
=
target_vars
,
executor
=
exe
,
main_program
=
float_prog
)
fluid
.
io
.
save_inference_model
(
dirname
=
'./inference_model/int8'
,
feeded_var_names
=
[
var
.
name
for
var
in
inputs
],
feeded_var_names
=
[
inputs
[
0
].
name
],
target_vars
=
target_vars
,
executor
=
exe
,
main_program
=
int8_prog
)
...
...
docs/zh_cn/tutorials/image_classification_mkldnn_quant_tutorial.md
浏览文件 @
64f9442b
# Intel CPU上部署量化模型教程
在Intel Casecade Lake机器上(如:Intel(R) Xeon(R) Gold 6271),经过量化和DNNL加速,INT8模型在单线程上性能为FP32模型的3~3.7倍;在Intel SkyLake机器上(如:Intel(R) Xeon(R) Gold 6148),单线程性能为FP32模型的1.5倍,而精度仅有极小下降。图像分类量化的样例教程请参考
[
图像分类INT8模型在CPU优化部署和预测
](
https://github.com/PaddlePaddle/PaddleSlim/tree/develop/demo/mkldnn_quant/
README.md
)
。自然语言处理模型的量化请参考
[
ERNIE INT8 模型精度与性能复现
](
https://github.com/PaddlePaddle/benchmark/tree/master/Inference/c%2B%2B/ernie/mkldnn
)
在Intel Casecade Lake机器上(如:Intel(R) Xeon(R) Gold 6271),经过量化和DNNL加速,INT8模型在单线程上性能为FP32模型的3~3.7倍;在Intel SkyLake机器上(如:Intel(R) Xeon(R) Gold 6148),单线程性能为FP32模型的1.5倍,而精度仅有极小下降。图像分类量化的样例教程请参考
[
图像分类INT8模型在CPU优化部署和预测
](
https://github.com/PaddlePaddle/PaddleSlim/tree/develop/demo/mkldnn_quant/
)
。自然语言处理模型的量化请参考
[
ERNIE INT8 模型精度与性能复现
](
https://github.com/PaddlePaddle/benchmark/tree/master/Inference/c%2B%2B/ernie/mkldnn
)
## 图像分类INT8模型在 Xeon(R) 6271 上的精度和性能
...
...
paddleslim/models/util.py
浏览文件 @
64f9442b
...
...
@@ -29,4 +29,4 @@ def image_classification(model, image_shape, class_num, use_gpu=False):
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
return
exe
,
train_program
,
val_program
,
(
image
,
label
),
(
acc_top1
.
name
,
acc_top5
.
name
,
avg_cost
.
name
)
acc_top1
.
name
,
acc_top5
.
name
,
avg_cost
.
name
,
out
.
name
)
paddleslim/prune/group_param.py
浏览文件 @
64f9442b
...
...
@@ -14,7 +14,7 @@
# limitations under the License.
from
..core
import
GraphWrapper
from
.prune_walker
import
conv2d
as
conv2d_walker
from
.prune_walker
import
PRUNE_WORKER
__all__
=
[
"collect_convs"
]
...
...
@@ -55,8 +55,9 @@ def collect_convs(params, graph, visited={}):
pruned_params
=
[]
param
=
graph
.
var
(
param
)
conv_op
=
param
.
outputs
()[
0
]
walker
=
conv2d_walker
(
conv_op
,
pruned_params
=
pruned_params
,
visited
=
visited
)
cls
=
PRUNE_WORKER
.
get
(
conv_op
.
type
())
walker
=
cls
(
conv_op
,
pruned_params
=
pruned_params
,
visited
=
visited
)
walker
.
prune
(
param
,
pruned_axis
=
0
,
pruned_idx
=
[
0
])
groups
.
append
(
pruned_params
)
visited
=
set
()
...
...
paddleslim/prune/prune_walker.py
浏览文件 @
64f9442b
...
...
@@ -84,9 +84,7 @@ class PruneWorker(object):
cls
=
PRUNE_WORKER
.
get
(
"default_walker"
)
_logger
.
debug
(
"
\n
from: {}
\n
to: {}
\n
pruned_axis: {}; var: {}"
.
format
(
self
.
op
,
op
,
pruned_axis
,
var
.
name
()))
walker
=
cls
(
op
,
pruned_params
=
self
.
pruned_params
,
visited
=
self
.
visited
)
walker
=
cls
(
op
,
pruned_params
=
self
.
pruned_params
,
visited
=
self
.
visited
)
walker
.
prune
(
var
,
pruned_axis
,
pruned_idx
)
...
...
@@ -175,29 +173,8 @@ class conv2d_transpose(PruneWorker):
self
.
_prune_op
(
op
,
filter_var
,
0
,
pruned_idx
)
elif
var
in
self
.
op
.
inputs
(
"Filter"
):
assert
pruned_axis
in
[
0
,
1
]
self
.
pruned_params
.
append
((
var
,
pruned_axis
,
pruned_idx
))
for
op
in
var
.
outputs
():
self
.
_prune_op
(
op
,
var
,
pruned_axis
,
pruned_idx
)
if
pruned_axis
==
1
:
if
len
(
self
.
op
.
inputs
(
"Bias"
))
>
0
:
self
.
pruned_params
.
append
(
(
self
.
op
.
inputs
(
"Bias"
),
channel_axis
,
pruned_idx
))
output_var
=
self
.
op
.
outputs
(
"Output"
)[
0
]
self
.
_visit
(
output_var
,
channel_axis
)
next_ops
=
output_var
.
outputs
()
for
op
in
next_ops
:
self
.
_prune_op
(
op
,
output_var
,
channel_axis
,
pruned_idx
)
elif
pruned_axis
==
0
:
input_var
=
self
.
op
.
inputs
(
"Input"
)[
0
]
self
.
_visit
(
input_var
,
channel_axis
)
pre_ops
=
input_var
.
inputs
()
for
op
in
pre_ops
:
self
.
_prune_op
(
op
,
input_var
,
channel_axis
,
pruned_idx
)
_logger
.
warn
(
"Skip pruning output channels of conv2d_transpose!"
)
return
elif
var
in
self
.
op
.
outputs
(
"Output"
):
assert
pruned_axis
==
channel_axis
,
"pruned_axis: {}; var: {}"
.
format
(
pruned_axis
,
var
.
name
())
...
...
tests/test_prune.py
浏览文件 @
64f9442b
...
...
@@ -41,6 +41,9 @@ class TestPrune(unittest.TestCase):
conv5
=
conv_bn_layer
(
sum2
,
8
,
3
,
"conv5"
)
conv6
=
conv_bn_layer
(
conv5
,
8
,
3
,
"conv6"
)
conv7
=
fluid
.
layers
.
conv2d_transpose
(
input
=
conv6
,
num_filters
=
16
,
filter_size
=
2
,
stride
=
2
)
shapes
=
{}
for
param
in
main_program
.
global_block
().
all_parameters
():
shapes
[
param
.
name
]
=
param
.
shape
...
...
@@ -53,8 +56,8 @@ class TestPrune(unittest.TestCase):
main_program
,
_
,
_
=
pruner
.
prune
(
main_program
,
scope
,
params
=
[
"conv4_weights"
],
ratios
=
[
0.5
],
params
=
[
"conv4_weights"
,
"conv2d_transpose_0.w_0"
],
ratios
=
[
0.5
,
0.6
],
place
=
place
,
lazy
=
False
,
only_graph
=
False
,
...
...
@@ -67,11 +70,12 @@ class TestPrune(unittest.TestCase):
"conv3_weights"
:
(
8
,
4
,
3
,
3
),
"conv4_weights"
:
(
4
,
8
,
3
,
3
),
"conv5_weights"
:
(
8
,
4
,
3
,
3
),
"conv6_weights"
:
(
8
,
8
,
3
,
3
)
"conv6_weights"
:
(
8
,
8
,
3
,
3
),
"conv2d_transpose_0.w_0"
:
(
8
,
16
,
2
,
2
),
}
for
param
in
main_program
.
global_block
().
all_parameters
():
if
"weights"
in
param
.
name
:
if
param
.
name
in
shapes
:
print
(
"param: {}; param shape: {}"
.
format
(
param
.
name
,
param
.
shape
))
self
.
assertTrue
(
param
.
shape
==
shapes
[
param
.
name
])
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录