未验证 提交 d159a6b3 编写于 作者: B Bai Yifan 提交者: GitHub

Fix PaddleSlim distillation demo configs (#3505)

* fix some configs

* fix doc
上级 ea8299a9
...@@ -139,16 +139,30 @@ strategies: ...@@ -139,16 +139,30 @@ strategies:
| baseline | 70.99%/89.68% | | baseline | 70.99%/89.68% |
| 蒸馏后 | - | | 蒸馏后 | - |
>训练超参: #### 训练超参
- batch size: 256
- lr_strategy: piecewise_decay
- step_epochs: 30, 60, 90
- num_epochs: 120
- l2_decay: 4e-5
- init lr: 0.1
### MobileNetV2 ### MobileNetV2
| FLOPS | top1_acc/top5_acc | | FLOPS | top1_acc/top5_acc |
| -------- | ----------------- | | -------- | ----------------- |
| baseline | 72.15%/90.65% | | baseline | 72.15%/90.65% |
| 蒸馏后 | - | | 蒸馏后 | 70.66%/90.42% |
>训练超参: #### 训练超参
- batch size: 256
- lr_strategy: piecewise_decay
- step_epochs: 30, 60, 90
- num_epochs: 120
- l2_decay: 4e-5
- init lr: 0.1
### ResNet34 ### ResNet34
...@@ -157,6 +171,13 @@ strategies: ...@@ -157,6 +171,13 @@ strategies:
| baseline | 74.57%/92.14% | | baseline | 74.57%/92.14% |
| 蒸馏后 | - | | 蒸馏后 | - |
>训练超参: #### 训练超参
- batch size: 256
- lr_strategy: piecewise_decay
- step_epochs: 30, 60, 90
- num_epochs: 120
- l2_decay: 4e-5
- init lr: 0.1
## FAQ ## FAQ
...@@ -34,7 +34,6 @@ add_arg('pretrained_model', str, None, "Whether to use pretraine ...@@ -34,7 +34,6 @@ add_arg('pretrained_model', str, None, "Whether to use pretraine
add_arg('teacher_model', str, None, "Set the teacher network to use.") add_arg('teacher_model', str, None, "Set the teacher network to use.")
add_arg('teacher_pretrained_model', str, None, "Whether to use pretrained model.") add_arg('teacher_pretrained_model', str, None, "Whether to use pretrained model.")
add_arg('compress_config', str, None, "The config file for compression with yaml format.") add_arg('compress_config', str, None, "The config file for compression with yaml format.")
add_arg('quant_only', bool, False, "Only do quantization-aware training.")
# yapf: enable # yapf: enable
model_list = [m for m in dir(models) if "__" not in m] model_list = [m for m in dir(models) if "__" not in m]
...@@ -50,45 +49,25 @@ def compress(args): ...@@ -50,45 +49,25 @@ def compress(args):
# model definition # model definition
model = models.__dict__[args.model]() model = models.__dict__[args.model]()
if args.model is "GoogleNet": if args.model == 'ResNet34':
out0, out1, out2 = model.net(input=image, class_dim=args.class_dim) model.prefix_name = 'res34'
cost0 = fluid.layers.cross_entropy(input=out0, label=label) out = model.net(input=image, class_dim=args.class_dim, fc_name='fc_0')
cost1 = fluid.layers.cross_entropy(input=out1, label=label)
cost2 = fluid.layers.cross_entropy(input=out2, label=label)
avg_cost0 = fluid.layers.mean(x=cost0)
avg_cost1 = fluid.layers.mean(x=cost1)
avg_cost2 = fluid.layers.mean(x=cost2)
avg_cost = avg_cost0 + 0.3 * avg_cost1 + 0.3 * avg_cost2
acc_top1 = fluid.layers.accuracy(input=out0, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=out0, label=label, k=5)
else: else:
if args.model == 'ResNet34': out = model.net(input=image, class_dim=args.class_dim)
model.prefix_name = 'res34' cost = fluid.layers.cross_entropy(input=out, label=label)
out = model.net(input=image, avg_cost = fluid.layers.mean(x=cost)
class_dim=args.class_dim, acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
fc_name='fc_0') acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
else:
out = model.net(input=image, class_dim=args.class_dim)
cost = fluid.layers.cross_entropy(input=out, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
#print("="*50+"student_model_params"+"="*50) #print("="*50+"student_model_params"+"="*50)
#for v in fluid.default_main_program().list_vars(): #for v in fluid.default_main_program().list_vars():
# print(v.name, v.shape) # print(v.name, v.shape)
val_program = fluid.default_main_program().clone() val_program = fluid.default_main_program().clone()
if args.quant_only: boundaries = [
boundaries = [ args.total_images / args.batch_size * 30, args.total_images /
args.total_images / args.batch_size * 10, args.batch_size * 60, args.total_images / args.batch_size * 90
args.total_images / args.batch_size * 16 ]
] values = [0.1, 0.01, 0.001, 0.0001]
values = [1e-4, 1e-5, 1e-6]
else:
boundaries = [
args.total_images / args.batch_size * 30, args.total_images /
args.batch_size * 60, args.total_images / args.batch_size * 90
]
values = [0.1, 0.01, 0.001, 0.0001]
opt = fluid.optimizer.Momentum( opt = fluid.optimizer.Momentum(
momentum=0.9, momentum=0.9,
learning_rate=fluid.layers.piecewise_decay( learning_rate=fluid.layers.piecewise_decay(
...@@ -117,37 +96,39 @@ def compress(args): ...@@ -117,37 +96,39 @@ def compress(args):
teacher_programs = [] teacher_programs = []
distiller_optimizer = None distiller_optimizer = None
if args.teacher_model:
teacher_model = models.__dict__[args.teacher_model](prefix_name='res50')
# define teacher program
teacher_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(teacher_program, startup_program):
img = teacher_program.global_block()._clone_variable(
image, force_persistable=False)
predict = teacher_model.net(img,
class_dim=args.class_dim,
fc_name='fc_0')
#print("="*50+"teacher_model_params"+"="*50)
#for v in teacher_program.list_vars():
# print(v.name, v.shape)
exe.run(startup_program)
assert args.teacher_pretrained_model and os.path.exists(
args.teacher_pretrained_model
), "teacher_pretrained_model should be set when teacher_model is not None."
def if_exist(var): teacher_model = models.__dict__[args.teacher_model](prefix_name='res50')
return os.path.exists( # define teacher program
os.path.join(args.teacher_pretrained_model, var.name)) teacher_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(teacher_program, startup_program):
img = teacher_program.global_block()._clone_variable(
image, force_persistable=False)
predict = teacher_model.net(img,
class_dim=args.class_dim,
fc_name='fc_0')
#print("="*50+"teacher_model_params"+"="*50)
#for v in teacher_program.list_vars():
# print(v.name, v.shape)
#return
exe.run(startup_program)
assert args.teacher_pretrained_model and os.path.exists(
args.teacher_pretrained_model
), "teacher_pretrained_model should be set when teacher_model is not None."
def if_exist(var):
return os.path.exists(
os.path.join(args.teacher_pretrained_model, var.name))
fluid.io.load_vars( fluid.io.load_vars(
exe, exe,
args.teacher_pretrained_model, args.teacher_pretrained_model,
main_program=teacher_program, main_program=teacher_program,
predicate=if_exist) predicate=if_exist)
distiller_optimizer = opt distiller_optimizer = opt
teacher_programs.append(teacher_program.clone(for_test=True)) teacher_programs.append(teacher_program.clone(for_test=True))
com_pass = Compressor( com_pass = Compressor(
place, place,
......
...@@ -3,7 +3,7 @@ distillers: ...@@ -3,7 +3,7 @@ distillers:
fsp_distiller: fsp_distiller:
class: 'FSPDistiller' class: 'FSPDistiller'
teacher_pairs: [['res50_res2a_branch2a.conv2d.output.1.tmp_0', 'res50_res3a_branch2a.conv2d.output.1.tmp_0']] teacher_pairs: [['res50_res2a_branch2a.conv2d.output.1.tmp_0', 'res50_res3a_branch2a.conv2d.output.1.tmp_0']]
student_pairs: [['depthwise_conv2d_1.tmp_0', 'conv2d_3.tmp_0']] student_pairs: [['depthwise_conv2d_1.tmp_0', 'depthwise_conv2d_2.tmp_0']]
distillation_loss_weight: 1 distillation_loss_weight: 1
l2_distiller: l2_distiller:
class: 'L2Distiller' class: 'L2Distiller'
......
...@@ -2,8 +2,8 @@ version: 1.0 ...@@ -2,8 +2,8 @@ version: 1.0
distillers: distillers:
fsp_distiller: fsp_distiller:
class: 'FSPDistiller' class: 'FSPDistiller'
teacher_pairs: [['res50_res2a_branch2a.conv2d.output.1.tmp_0', 'res50_res2a_branch2c.conv2d.output.1.tmp_0'], ['res50_res3b_branch2a.conv2d.output.1.tmp_0', 'res50_res3b_branch2c.conv2d.output.1.tmp_0']] teacher_pairs: [['res50_res2a_branch2a.conv2d.output.1.tmp_0', 'res50_res2b_branch2a.conv2d.output.1.tmp_0'], ['res50_res4b_branch2a.conv2d.output.1.tmp_0', 'res50_res4c_branch2a.conv2d.output.1.tmp_0']]
student_pairs: [['res34_res2a_branch2a.conv2d.output.1.tmp_0', 'res34_res2a_branch2c.conv2d.output.1.tmp_0'], ['res34_res3b_branch2a.conv2d.output.1.tmp_0', 'res34_res3b_branch2c.conv2d.output.1.tmp_0']] student_pairs: [['res34_res2a_branch2a.conv2d.output.1.tmp_0', 'res34_res2a_branch1.conv2d.output.1.tmp_0'], ['res34_res4b_branch2a.conv2d.output.1.tmp_0', 'res34_res4c_branch2a.conv2d.output.1.tmp_0']]
distillation_loss_weight: 1 distillation_loss_weight: 1
l2_distiller: l2_distiller:
class: 'L2Distiller' class: 'L2Distiller'
......
...@@ -44,12 +44,6 @@ python -u compress.py \ ...@@ -44,12 +44,6 @@ python -u compress.py \
> mobilenet_v1.log 2>&1 & > mobilenet_v1.log 2>&1 &
tailf mobilenet_v1.log tailf mobilenet_v1.log
cd ${pretrain_dir}/ResNet50_pretrained
for files in $(ls res50_*)
do mv $files ${files#*_}
done
cd -
## for mobilenet_v2 distillation ## for mobilenet_v2 distillation
#cd ${pretrain_dir}/ResNet50_pretrained #cd ${pretrain_dir}/ResNet50_pretrained
#for files in $(ls res50_*) #for files in $(ls res50_*)
...@@ -67,12 +61,6 @@ cd - ...@@ -67,12 +61,6 @@ cd -
#--compress_config ./configs/mobilenetv2_resnet50_distillation.yaml\ #--compress_config ./configs/mobilenetv2_resnet50_distillation.yaml\
#> mobilenet_v2.log 2>&1 & #> mobilenet_v2.log 2>&1 &
#tailf mobilenet_v2.log #tailf mobilenet_v2.log
#
#cd ${pretrain_dir}/ResNet50_pretrained
#for files in $(ls res50_*)
# do mv $files ${files#*_}
#done
#cd -
## for resnet34 distillation ## for resnet34 distillation
#cd ${pretrain_dir}/ResNet50_pretrained #cd ${pretrain_dir}/ResNet50_pretrained
...@@ -91,9 +79,3 @@ cd - ...@@ -91,9 +79,3 @@ cd -
#--compress_config ./configs/resnet34_resnet50_distillation.yaml \ #--compress_config ./configs/resnet34_resnet50_distillation.yaml \
#> resnet34.log 2>&1 & #> resnet34.log 2>&1 &
#tailf resnet34.log #tailf resnet34.log
#
#cd ${pretrain_dir}/ResNet50_pretrained
#for files in $(ls res50_*)
# do mv $files ${files#*_}
#done
#cd -
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册