Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleSlim
提交
1de853ee
P
PaddleSlim
项目概览
PaddlePaddle
/
PaddleSlim
大约 2 年 前同步成功
通知
51
Star
1434
Fork
344
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
53
列表
看板
标记
里程碑
合并请求
16
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleSlim
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
53
Issue
53
列表
看板
标记
里程碑
合并请求
16
合并请求
16
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
1de853ee
编写于
1月 21, 2020
作者:
B
Bai Yifan
提交者:
GitHub
1月 21, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Refine distillation demo (#51)
* refine distillation demo
上级
eb48cb63
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
370 addition
and
29 deletion
+370
-29
demo/distillation/README.md
demo/distillation/README.md
+40
-0
demo/distillation/distill.py
demo/distillation/distill.py
+35
-28
demo/models/__init__.py
demo/models/__init__.py
+4
-1
demo/models/resnet_vd.py
demo/models/resnet_vd.py
+291
-0
未找到文件。
demo/distillation/README.md
0 → 100644
浏览文件 @
1de853ee
# 知识蒸馏示例
本示例将介绍如何使用知识蒸馏接口训练模型,蒸馏训练得到的模型相比不使用蒸馏策略的基线模型在精度上会有一定的提升。
## 接口介绍
请参考
[
知识蒸馏API文档
](
https://paddlepaddle.github.io/PaddleSlim/api/single_distiller_api/
)
。
### 1. 蒸馏训练配置
示例使用ResNet50_vd作为teacher模型,对MobileNet结构的student网络进行蒸馏训练。
默认配置:
```
yaml
batch_size
:
256
init_lr
:
0.1
lr_strategy
:
piecewise_decay
l2_decay
:
3e-5
momentum_rate
:
0.9
num_epochs
:
120
data
:
imagenet
```
训练使用默认配置启动即可
### 2. 启动训练
在配置好ImageNet数据集后,用以下命令启动训练即可:
```
shell
CUDA_VISIBLE_DEVICES
=
0,1,2,3 python distill.py
```
### 3. 训练结果
对比不使用蒸馏策略的基线模型(Top-1/Top-5: 70.99%/89.68%),
经过120轮的蒸馏训练,MobileNet模型的Top-1/Top-5准确率达到72.77%/90.68%, Top-1/Top-5性能提升+1.78%/+1.00%
详细实验数据请参见
[
PaddleSlim模型库蒸馏部分
](
https://paddlepaddle.github.io/PaddleSlim/model_zoo/#13
)
demo/distillation/distill
ation_demo
.py
→
demo/distillation/distill.py
浏览文件 @
1de853ee
...
...
@@ -23,7 +23,7 @@ _logger.setLevel(logging.INFO)
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
)
add_arg
=
functools
.
partial
(
add_arguments
,
argparser
=
parser
)
# yapf: disable
add_arg
(
'batch_size'
,
int
,
64
*
4
,
"Minibatch size."
)
add_arg
(
'batch_size'
,
int
,
64
,
"Minibatch size."
)
add_arg
(
'use_gpu'
,
bool
,
True
,
"Whether to use GPU or not."
)
add_arg
(
'total_images'
,
int
,
1281167
,
"Training image number."
)
add_arg
(
'image_shape'
,
str
,
"3,224,224"
,
"Input image size"
)
...
...
@@ -32,12 +32,12 @@ add_arg('lr_strategy', str, "piecewise_decay", "The learning rate decay
add_arg
(
'l2_decay'
,
float
,
3e-5
,
"The l2_decay parameter."
)
add_arg
(
'momentum_rate'
,
float
,
0.9
,
"The value of momentum_rate."
)
add_arg
(
'num_epochs'
,
int
,
120
,
"The number of total epochs."
)
add_arg
(
'data'
,
str
,
"
cifar10
"
,
"Which data to use. 'cifar10' or 'imagenet'"
)
add_arg
(
'data'
,
str
,
"
imagenet
"
,
"Which data to use. 'cifar10' or 'imagenet'"
)
add_arg
(
'log_period'
,
int
,
20
,
"Log period in batches."
)
add_arg
(
'model'
,
str
,
"MobileNet"
,
"Set the network to use."
)
add_arg
(
'pretrained_model'
,
str
,
None
,
"Whether to use pretrained model."
)
add_arg
(
'teacher_model'
,
str
,
"ResNet50"
,
"Set the teacher network to use."
)
add_arg
(
'teacher_pretrained_model'
,
str
,
"./ResNet50_pretrained"
,
"Whether to use pretrained model."
)
add_arg
(
'teacher_model'
,
str
,
"ResNet50
_vd
"
,
"Set the teacher network to use."
)
add_arg
(
'teacher_pretrained_model'
,
str
,
"./ResNet50_
vd_
pretrained"
,
"Whether to use pretrained model."
)
parser
.
add_argument
(
'--step_epochs'
,
nargs
=
'+'
,
type
=
int
,
default
=
[
30
,
60
,
90
],
help
=
"piecewise decay step"
)
# yapf: enable
...
...
@@ -45,7 +45,12 @@ model_list = [m for m in dir(models) if "__" not in m]
def
piecewise_decay
(
args
):
step
=
int
(
math
.
ceil
(
float
(
args
.
total_images
)
/
args
.
batch_size
))
if
args
.
use_gpu
:
devices_num
=
fluid
.
core
.
get_cuda_device_count
()
else
:
devices_num
=
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
1
))
step
=
int
(
math
.
ceil
(
float
(
args
.
total_images
)
/
args
.
batch_size
))
*
devices_num
bd
=
[
step
*
e
for
e
in
args
.
step_epochs
]
lr
=
[
args
.
lr
*
(
0.1
**
i
)
for
i
in
range
(
len
(
bd
)
+
1
)]
learning_rate
=
fluid
.
layers
.
piecewise_decay
(
boundaries
=
bd
,
values
=
lr
)
...
...
@@ -53,18 +58,23 @@ def piecewise_decay(args):
learning_rate
=
learning_rate
,
momentum
=
args
.
momentum_rate
,
regularization
=
fluid
.
regularizer
.
L2Decay
(
args
.
l2_decay
))
return
optimizer
return
learning_rate
,
optimizer
def
cosine_decay
(
args
):
step
=
int
(
math
.
ceil
(
float
(
args
.
total_images
)
/
args
.
batch_size
))
if
cfg
.
use_gpu
:
devices_num
=
fluid
.
core
.
get_cuda_device_count
()
else
:
devices_num
=
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
1
))
step
=
int
(
math
.
ceil
(
float
(
args
.
total_images
)
/
args
.
batch_size
))
*
devices_num
learning_rate
=
fluid
.
layers
.
cosine_decay
(
learning_rate
=
args
.
lr
,
step_each_epoch
=
step
,
epochs
=
args
.
num_epochs
)
optimizer
=
fluid
.
optimizer
.
Momentum
(
learning_rate
=
learning_rate
,
momentum
=
args
.
momentum_rate
,
regularization
=
fluid
.
regularizer
.
L2Decay
(
args
.
l2_decay
))
return
optimizer
return
learning_rate
,
optimizer
def
create_optimizer
(
args
):
...
...
@@ -118,9 +128,6 @@ def compress(args):
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
acc_top1
=
fluid
.
layers
.
accuracy
(
input
=
out
,
label
=
label
,
k
=
1
)
acc_top5
=
fluid
.
layers
.
accuracy
(
input
=
out
,
label
=
label
,
k
=
5
)
#print("="*50+"student_model_params"+"="*50)
#for v in student_program.list_vars():
# print(v.name, v.shape)
place
=
fluid
.
CUDAPlace
(
0
)
if
args
.
use_gpu
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
...
...
@@ -145,23 +152,19 @@ def compress(args):
name
=
'image'
,
shape
=
image_shape
,
dtype
=
'float32'
)
predict
=
teacher_model
.
net
(
image
,
class_dim
=
class_dim
)
#print("="*50+"teacher_model_params"+"="*50)
#for v in teacher_program.list_vars():
# print(v.name, v.shape)
exe
.
run
(
t_startup
)
_download
(
'http://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar'
,
'.'
)
_decompress
(
'./ResNet50_pretrained.tar'
)
if
not
os
.
path
.
exists
(
args
.
teacher_pretrained_model
):
_download
(
'http://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar'
,
'.'
)
_decompress
(
'./ResNet50_vd_pretrained.tar'
)
assert
args
.
teacher_pretrained_model
and
os
.
path
.
exists
(
args
.
teacher_pretrained_model
),
"teacher_pretrained_model should be set when teacher_model is not None."
def
if_exist
(
var
):
return
os
.
path
.
exists
(
os
.
path
.
join
(
args
.
teacher_pretrained_model
,
var
.
name
)
)
and
var
.
name
!=
'fc_0.w_0'
and
var
.
name
!=
'fc_0.b_0'
os
.
path
.
join
(
args
.
teacher_pretrained_model
,
var
.
name
))
fluid
.
io
.
load_vars
(
exe
,
...
...
@@ -173,9 +176,10 @@ def compress(args):
merge
(
teacher_program
,
student_program
,
data_name_map
,
place
)
with
fluid
.
program_guard
(
student_program
,
s_startup
):
l2_loss
=
l2_loss
(
"teacher_fc_0.tmp_0"
,
"fc_0.tmp_0"
,
student_program
)
loss
=
avg_cost
+
l2_loss
opt
=
create_optimizer
(
args
)
distill_loss
=
soft_label_loss
(
"teacher_fc_0.tmp_0"
,
"fc_0.tmp_0"
,
student_program
)
loss
=
avg_cost
+
distill_loss
lr
,
opt
=
create_optimizer
(
args
)
opt
.
minimize
(
loss
)
exe
.
run
(
s_startup
)
build_strategy
=
fluid
.
BuildStrategy
()
...
...
@@ -185,14 +189,17 @@ def compress(args):
for
epoch_id
in
range
(
args
.
num_epochs
):
for
step_id
,
data
in
enumerate
(
train_loader
):
loss_1
,
loss_2
,
loss_3
=
exe
.
run
(
l
r_np
,
l
oss_1
,
loss_2
,
loss_3
=
exe
.
run
(
parallel_main
,
feed
=
data
,
fetch_list
=
[
loss
.
name
,
avg_cost
.
name
,
l2_loss
.
name
])
fetch_list
=
[
lr
.
name
,
loss
.
name
,
avg_cost
.
name
,
distill_loss
.
name
])
if
step_id
%
args
.
log_period
==
0
:
_logger
.
info
(
"train_epoch {} step {} loss {:.6f}, class loss {:.6f}, l2 loss {:.6f}"
.
format
(
epoch_id
,
step_id
,
loss_1
[
0
],
loss_2
[
0
],
loss_3
[
0
]))
"train_epoch {} step {} lr {:.6f}, loss {:.6f}, class loss {:.6f}, distill loss {:.6f}"
.
format
(
epoch_id
,
step_id
,
lr_np
[
0
],
loss_1
[
0
],
loss_2
[
0
],
loss_3
[
0
]))
val_acc1s
=
[]
val_acc5s
=
[]
for
step_id
,
data
in
enumerate
(
valid_loader
):
...
...
demo/models/__init__.py
浏览文件 @
1de853ee
from
.mobilenet
import
MobileNet
from
.resnet
import
ResNet34
,
ResNet50
from
.resnet_vd
import
ResNet50_vd
from
.mobilenet_v2
import
MobileNetV2
from
.pvanet
import
PVANet
__all__
=
[
'MobileNet'
,
'ResNet34'
,
'ResNet50'
,
'MobileNetV2'
,
'PVANet'
]
__all__
=
[
'MobileNet'
,
'ResNet34'
,
'ResNet50'
,
'MobileNetV2'
,
'PVANet'
,
'ResNet50_vd'
]
demo/models/resnet_vd.py
0 → 100644
浏览文件 @
1de853ee
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
math
import
paddle
import
paddle.fluid
as
fluid
from
paddle.fluid.param_attr
import
ParamAttr
__all__
=
[
"ResNet"
,
"ResNet18_vd"
,
"ResNet34_vd"
,
"ResNet50_vd"
,
"ResNet101_vd"
,
"ResNet152_vd"
,
"ResNet200_vd"
]
class
ResNet
():
def
__init__
(
self
,
layers
=
50
,
is_3x3
=
False
):
self
.
layers
=
layers
self
.
is_3x3
=
is_3x3
def
net
(
self
,
input
,
class_dim
=
1000
):
is_3x3
=
self
.
is_3x3
layers
=
self
.
layers
supported_layers
=
[
18
,
34
,
50
,
101
,
152
,
200
]
assert
layers
in
supported_layers
,
\
"supported layers are {} but input layer is {}"
.
format
(
supported_layers
,
layers
)
if
layers
==
18
:
depth
=
[
2
,
2
,
2
,
2
]
elif
layers
==
34
or
layers
==
50
:
depth
=
[
3
,
4
,
6
,
3
]
elif
layers
==
101
:
depth
=
[
3
,
4
,
23
,
3
]
elif
layers
==
152
:
depth
=
[
3
,
8
,
36
,
3
]
elif
layers
==
200
:
depth
=
[
3
,
12
,
48
,
3
]
num_filters
=
[
64
,
128
,
256
,
512
]
if
is_3x3
==
False
:
conv
=
self
.
conv_bn_layer
(
input
=
input
,
num_filters
=
64
,
filter_size
=
7
,
stride
=
2
,
act
=
'relu'
)
else
:
conv
=
self
.
conv_bn_layer
(
input
=
input
,
num_filters
=
32
,
filter_size
=
3
,
stride
=
2
,
act
=
'relu'
,
name
=
'conv1_1'
)
conv
=
self
.
conv_bn_layer
(
input
=
conv
,
num_filters
=
32
,
filter_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
'conv1_2'
)
conv
=
self
.
conv_bn_layer
(
input
=
conv
,
num_filters
=
64
,
filter_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
'conv1_3'
)
conv
=
fluid
.
layers
.
pool2d
(
input
=
conv
,
pool_size
=
3
,
pool_stride
=
2
,
pool_padding
=
1
,
pool_type
=
'max'
)
if
layers
>=
50
:
for
block
in
range
(
len
(
depth
)):
for
i
in
range
(
depth
[
block
]):
if
layers
in
[
101
,
152
,
200
]
and
block
==
2
:
if
i
==
0
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
"a"
else
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
"b"
+
str
(
i
)
else
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
chr
(
97
+
i
)
conv
=
self
.
bottleneck_block
(
input
=
conv
,
num_filters
=
num_filters
[
block
],
stride
=
2
if
i
==
0
and
block
!=
0
else
1
,
if_first
=
block
==
i
==
0
,
name
=
conv_name
)
else
:
for
block
in
range
(
len
(
depth
)):
for
i
in
range
(
depth
[
block
]):
conv_name
=
"res"
+
str
(
block
+
2
)
+
chr
(
97
+
i
)
conv
=
self
.
basic_block
(
input
=
conv
,
num_filters
=
num_filters
[
block
],
stride
=
2
if
i
==
0
and
block
!=
0
else
1
,
if_first
=
block
==
i
==
0
,
name
=
conv_name
)
pool
=
fluid
.
layers
.
pool2d
(
input
=
conv
,
pool_type
=
'avg'
,
global_pooling
=
True
)
stdv
=
1.0
/
math
.
sqrt
(
pool
.
shape
[
1
]
*
1.0
)
out
=
fluid
.
layers
.
fc
(
input
=
pool
,
size
=
class_dim
,
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
-
stdv
,
stdv
)))
return
out
def
conv_bn_layer
(
self
,
input
,
num_filters
,
filter_size
,
stride
=
1
,
groups
=
1
,
act
=
None
,
name
=
None
):
conv
=
fluid
.
layers
.
conv2d
(
input
=
input
,
num_filters
=
num_filters
,
filter_size
=
filter_size
,
stride
=
stride
,
padding
=
(
filter_size
-
1
)
//
2
,
groups
=
groups
,
act
=
None
,
param_attr
=
ParamAttr
(
name
=
name
+
"_weights"
),
bias_attr
=
False
)
if
name
==
"conv1"
:
bn_name
=
"bn_"
+
name
else
:
bn_name
=
"bn"
+
name
[
3
:]
return
fluid
.
layers
.
batch_norm
(
input
=
conv
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
bn_name
+
'_scale'
),
bias_attr
=
ParamAttr
(
bn_name
+
'_offset'
),
moving_mean_name
=
bn_name
+
'_mean'
,
moving_variance_name
=
bn_name
+
'_variance'
)
def
conv_bn_layer_new
(
self
,
input
,
num_filters
,
filter_size
,
stride
=
1
,
groups
=
1
,
act
=
None
,
name
=
None
):
pool
=
fluid
.
layers
.
pool2d
(
input
=
input
,
pool_size
=
2
,
pool_stride
=
2
,
pool_padding
=
0
,
pool_type
=
'avg'
,
ceil_mode
=
True
)
conv
=
fluid
.
layers
.
conv2d
(
input
=
pool
,
num_filters
=
num_filters
,
filter_size
=
filter_size
,
stride
=
1
,
padding
=
(
filter_size
-
1
)
//
2
,
groups
=
groups
,
act
=
None
,
param_attr
=
ParamAttr
(
name
=
name
+
"_weights"
),
bias_attr
=
False
)
if
name
==
"conv1"
:
bn_name
=
"bn_"
+
name
else
:
bn_name
=
"bn"
+
name
[
3
:]
return
fluid
.
layers
.
batch_norm
(
input
=
conv
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
bn_name
+
'_scale'
),
bias_attr
=
ParamAttr
(
bn_name
+
'_offset'
),
moving_mean_name
=
bn_name
+
'_mean'
,
moving_variance_name
=
bn_name
+
'_variance'
)
def
shortcut
(
self
,
input
,
ch_out
,
stride
,
name
,
if_first
=
False
):
ch_in
=
input
.
shape
[
1
]
if
ch_in
!=
ch_out
or
stride
!=
1
:
if
if_first
:
return
self
.
conv_bn_layer
(
input
,
ch_out
,
1
,
stride
,
name
=
name
)
else
:
return
self
.
conv_bn_layer_new
(
input
,
ch_out
,
1
,
stride
,
name
=
name
)
elif
if_first
:
return
self
.
conv_bn_layer
(
input
,
ch_out
,
1
,
stride
,
name
=
name
)
else
:
return
input
def
bottleneck_block
(
self
,
input
,
num_filters
,
stride
,
name
,
if_first
):
conv0
=
self
.
conv_bn_layer
(
input
=
input
,
num_filters
=
num_filters
,
filter_size
=
1
,
act
=
'relu'
,
name
=
name
+
"_branch2a"
)
conv1
=
self
.
conv_bn_layer
(
input
=
conv0
,
num_filters
=
num_filters
,
filter_size
=
3
,
stride
=
stride
,
act
=
'relu'
,
name
=
name
+
"_branch2b"
)
conv2
=
self
.
conv_bn_layer
(
input
=
conv1
,
num_filters
=
num_filters
*
4
,
filter_size
=
1
,
act
=
None
,
name
=
name
+
"_branch2c"
)
short
=
self
.
shortcut
(
input
,
num_filters
*
4
,
stride
,
if_first
=
if_first
,
name
=
name
+
"_branch1"
)
return
fluid
.
layers
.
elementwise_add
(
x
=
short
,
y
=
conv2
,
act
=
'relu'
)
def
basic_block
(
self
,
input
,
num_filters
,
stride
,
name
,
if_first
):
conv0
=
self
.
conv_bn_layer
(
input
=
input
,
num_filters
=
num_filters
,
filter_size
=
3
,
act
=
'relu'
,
stride
=
stride
,
name
=
name
+
"_branch2a"
)
conv1
=
self
.
conv_bn_layer
(
input
=
conv0
,
num_filters
=
num_filters
,
filter_size
=
3
,
act
=
None
,
name
=
name
+
"_branch2b"
)
short
=
self
.
shortcut
(
input
,
num_filters
,
stride
,
if_first
=
if_first
,
name
=
name
+
"_branch1"
)
return
fluid
.
layers
.
elementwise_add
(
x
=
short
,
y
=
conv1
,
act
=
'relu'
)
def
ResNet18_vd
():
model
=
ResNet
(
layers
=
18
,
is_3x3
=
True
)
return
model
def
ResNet34_vd
():
model
=
ResNet
(
layers
=
34
,
is_3x3
=
True
)
return
model
def
ResNet50_vd
():
model
=
ResNet
(
layers
=
50
,
is_3x3
=
True
)
return
model
def
ResNet101_vd
():
model
=
ResNet
(
layers
=
101
,
is_3x3
=
True
)
return
model
def
ResNet152_vd
():
model
=
ResNet
(
layers
=
152
,
is_3x3
=
True
)
return
model
def
ResNet200_vd
():
model
=
ResNet
(
layers
=
200
,
is_3x3
=
True
)
return
model
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录