未验证 提交 d159a6b3 编写于 作者: B Bai Yifan 提交者: GitHub

Fix PaddleSlim distillation demo configs (#3505)

* fix some configs

* fix doc
上级 ea8299a9
......@@ -139,16 +139,30 @@ strategies:
| baseline | 70.99%/89.68% |
| 蒸馏后 | - |
>训练超参:
#### 训练超参
- batch size: 256
- lr_strategy: piecewise_decay
- step_epochs: 30, 60, 90
- num_epochs: 120
- l2_decay: 4e-5
- init lr: 0.1
### MobileNetV2
| FLOPS | top1_acc/top5_acc |
| -------- | ----------------- |
| baseline | 72.15%/90.65% |
| 蒸馏后 | - |
| 蒸馏后 | 70.66%/90.42% |
>训练超参:
#### 训练超参
- batch size: 256
- lr_strategy: piecewise_decay
- step_epochs: 30, 60, 90
- num_epochs: 120
- l2_decay: 4e-5
- init lr: 0.1
### ResNet34
......@@ -157,6 +171,13 @@ strategies:
| baseline | 74.57%/92.14% |
| 蒸馏后 | - |
>训练超参:
#### 训练超参
- batch size: 256
- lr_strategy: piecewise_decay
- step_epochs: 30, 60, 90
- num_epochs: 120
- l2_decay: 4e-5
- init lr: 0.1
## FAQ
......@@ -34,7 +34,6 @@ add_arg('pretrained_model', str, None, "Whether to use pretraine
add_arg('teacher_model', str, None, "Set the teacher network to use.")
add_arg('teacher_pretrained_model', str, None, "Whether to use pretrained model.")
add_arg('compress_config', str, None, "The config file for compression with yaml format.")
add_arg('quant_only', bool, False, "Only do quantization-aware training.")
# yapf: enable
model_list = [m for m in dir(models) if "__" not in m]
......@@ -50,45 +49,25 @@ def compress(args):
# model definition
model = models.__dict__[args.model]()
if args.model is "GoogleNet":
out0, out1, out2 = model.net(input=image, class_dim=args.class_dim)
cost0 = fluid.layers.cross_entropy(input=out0, label=label)
cost1 = fluid.layers.cross_entropy(input=out1, label=label)
cost2 = fluid.layers.cross_entropy(input=out2, label=label)
avg_cost0 = fluid.layers.mean(x=cost0)
avg_cost1 = fluid.layers.mean(x=cost1)
avg_cost2 = fluid.layers.mean(x=cost2)
avg_cost = avg_cost0 + 0.3 * avg_cost1 + 0.3 * avg_cost2
acc_top1 = fluid.layers.accuracy(input=out0, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=out0, label=label, k=5)
if args.model == 'ResNet34':
model.prefix_name = 'res34'
out = model.net(input=image, class_dim=args.class_dim, fc_name='fc_0')
else:
if args.model == 'ResNet34':
model.prefix_name = 'res34'
out = model.net(input=image,
class_dim=args.class_dim,
fc_name='fc_0')
else:
out = model.net(input=image, class_dim=args.class_dim)
cost = fluid.layers.cross_entropy(input=out, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
out = model.net(input=image, class_dim=args.class_dim)
cost = fluid.layers.cross_entropy(input=out, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
#print("="*50+"student_model_params"+"="*50)
#for v in fluid.default_main_program().list_vars():
# print(v.name, v.shape)
val_program = fluid.default_main_program().clone()
if args.quant_only:
boundaries = [
args.total_images / args.batch_size * 10,
args.total_images / args.batch_size * 16
]
values = [1e-4, 1e-5, 1e-6]
else:
boundaries = [
args.total_images / args.batch_size * 30, args.total_images /
args.batch_size * 60, args.total_images / args.batch_size * 90
]
values = [0.1, 0.01, 0.001, 0.0001]
boundaries = [
args.total_images / args.batch_size * 30, args.total_images /
args.batch_size * 60, args.total_images / args.batch_size * 90
]
values = [0.1, 0.01, 0.001, 0.0001]
opt = fluid.optimizer.Momentum(
momentum=0.9,
learning_rate=fluid.layers.piecewise_decay(
......@@ -117,37 +96,39 @@ def compress(args):
teacher_programs = []
distiller_optimizer = None
if args.teacher_model:
teacher_model = models.__dict__[args.teacher_model](prefix_name='res50')
# define teacher program
teacher_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(teacher_program, startup_program):
img = teacher_program.global_block()._clone_variable(
image, force_persistable=False)
predict = teacher_model.net(img,
class_dim=args.class_dim,
fc_name='fc_0')
#print("="*50+"teacher_model_params"+"="*50)
#for v in teacher_program.list_vars():
# print(v.name, v.shape)
exe.run(startup_program)
assert args.teacher_pretrained_model and os.path.exists(
args.teacher_pretrained_model
), "teacher_pretrained_model should be set when teacher_model is not None."
def if_exist(var):
return os.path.exists(
os.path.join(args.teacher_pretrained_model, var.name))
teacher_model = models.__dict__[args.teacher_model](prefix_name='res50')
# define teacher program
teacher_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(teacher_program, startup_program):
img = teacher_program.global_block()._clone_variable(
image, force_persistable=False)
predict = teacher_model.net(img,
class_dim=args.class_dim,
fc_name='fc_0')
#print("="*50+"teacher_model_params"+"="*50)
#for v in teacher_program.list_vars():
# print(v.name, v.shape)
#return
exe.run(startup_program)
assert args.teacher_pretrained_model and os.path.exists(
args.teacher_pretrained_model
), "teacher_pretrained_model should be set when teacher_model is not None."
def if_exist(var):
return os.path.exists(
os.path.join(args.teacher_pretrained_model, var.name))
fluid.io.load_vars(
exe,
args.teacher_pretrained_model,
main_program=teacher_program,
predicate=if_exist)
fluid.io.load_vars(
exe,
args.teacher_pretrained_model,
main_program=teacher_program,
predicate=if_exist)
distiller_optimizer = opt
teacher_programs.append(teacher_program.clone(for_test=True))
distiller_optimizer = opt
teacher_programs.append(teacher_program.clone(for_test=True))
com_pass = Compressor(
place,
......
......@@ -3,7 +3,7 @@ distillers:
fsp_distiller:
class: 'FSPDistiller'
teacher_pairs: [['res50_res2a_branch2a.conv2d.output.1.tmp_0', 'res50_res3a_branch2a.conv2d.output.1.tmp_0']]
student_pairs: [['depthwise_conv2d_1.tmp_0', 'conv2d_3.tmp_0']]
student_pairs: [['depthwise_conv2d_1.tmp_0', 'depthwise_conv2d_2.tmp_0']]
distillation_loss_weight: 1
l2_distiller:
class: 'L2Distiller'
......
......@@ -2,8 +2,8 @@ version: 1.0
distillers:
fsp_distiller:
class: 'FSPDistiller'
teacher_pairs: [['res50_res2a_branch2a.conv2d.output.1.tmp_0', 'res50_res2a_branch2c.conv2d.output.1.tmp_0'], ['res50_res3b_branch2a.conv2d.output.1.tmp_0', 'res50_res3b_branch2c.conv2d.output.1.tmp_0']]
student_pairs: [['res34_res2a_branch2a.conv2d.output.1.tmp_0', 'res34_res2a_branch2c.conv2d.output.1.tmp_0'], ['res34_res3b_branch2a.conv2d.output.1.tmp_0', 'res34_res3b_branch2c.conv2d.output.1.tmp_0']]
teacher_pairs: [['res50_res2a_branch2a.conv2d.output.1.tmp_0', 'res50_res2b_branch2a.conv2d.output.1.tmp_0'], ['res50_res4b_branch2a.conv2d.output.1.tmp_0', 'res50_res4c_branch2a.conv2d.output.1.tmp_0']]
student_pairs: [['res34_res2a_branch2a.conv2d.output.1.tmp_0', 'res34_res2a_branch1.conv2d.output.1.tmp_0'], ['res34_res4b_branch2a.conv2d.output.1.tmp_0', 'res34_res4c_branch2a.conv2d.output.1.tmp_0']]
distillation_loss_weight: 1
l2_distiller:
class: 'L2Distiller'
......
......@@ -44,12 +44,6 @@ python -u compress.py \
> mobilenet_v1.log 2>&1 &
tailf mobilenet_v1.log
cd ${pretrain_dir}/ResNet50_pretrained
for files in $(ls res50_*)
do mv $files ${files#*_}
done
cd -
## for mobilenet_v2 distillation
#cd ${pretrain_dir}/ResNet50_pretrained
#for files in $(ls res50_*)
......@@ -67,12 +61,6 @@ cd -
#--compress_config ./configs/mobilenetv2_resnet50_distillation.yaml\
#> mobilenet_v2.log 2>&1 &
#tailf mobilenet_v2.log
#
#cd ${pretrain_dir}/ResNet50_pretrained
#for files in $(ls res50_*)
# do mv $files ${files#*_}
#done
#cd -
## for resnet34 distillation
#cd ${pretrain_dir}/ResNet50_pretrained
......@@ -91,9 +79,3 @@ cd -
#--compress_config ./configs/resnet34_resnet50_distillation.yaml \
#> resnet34.log 2>&1 &
#tailf resnet34.log
#
#cd ${pretrain_dir}/ResNet50_pretrained
#for files in $(ls res50_*)
# do mv $files ${files#*_}
#done
#cd -
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册