From d159a6b389638b4e8dcafd1e115e163590516e46 Mon Sep 17 00:00:00 2001 From: Bai Yifan Date: Fri, 11 Oct 2019 13:07:59 +0800 Subject: [PATCH] Fix PaddleSlim distillation demo configs (#3505) * fix some configs * fix doc --- .../classification/distillation/README.md | 29 ++++- .../classification/distillation/compress.py | 107 +++++++----------- .../mobilenetv1_resnet50_distillation.yaml | 2 +- .../resnet34_resnet50_distillation.yaml | 4 +- PaddleSlim/classification/distillation/run.sh | 18 --- 5 files changed, 72 insertions(+), 88 deletions(-) diff --git a/PaddleSlim/classification/distillation/README.md b/PaddleSlim/classification/distillation/README.md index a9ee6f3d..13b83a3b 100755 --- a/PaddleSlim/classification/distillation/README.md +++ b/PaddleSlim/classification/distillation/README.md @@ -139,16 +139,30 @@ strategies: | baseline | 70.99%/89.68% | | 蒸馏后 | - | ->训练超参: +#### 训练超参 + +- batch size: 256 +- lr_strategy: piecewise_decay +- step_epochs: 30, 60, 90 +- num_epochs: 120 +- l2_decay: 4e-5 +- init lr: 0.1 ### MobileNetV2 | FLOPS | top1_acc/top5_acc | | -------- | ----------------- | | baseline | 72.15%/90.65% | -| 蒸馏后 | - | +| 蒸馏后 | 70.66%/90.42% | ->训练超参: +#### 训练超参 + +- batch size: 256 +- lr_strategy: piecewise_decay +- step_epochs: 30, 60, 90 +- num_epochs: 120 +- l2_decay: 4e-5 +- init lr: 0.1 ### ResNet34 @@ -157,6 +171,13 @@ strategies: | baseline | 74.57%/92.14% | | 蒸馏后 | - | ->训练超参: +#### 训练超参 + +- batch size: 256 +- lr_strategy: piecewise_decay +- step_epochs: 30, 60, 90 +- num_epochs: 120 +- l2_decay: 4e-5 +- init lr: 0.1 ## FAQ diff --git a/PaddleSlim/classification/distillation/compress.py b/PaddleSlim/classification/distillation/compress.py index 8c6ac9ae..49a800f8 100644 --- a/PaddleSlim/classification/distillation/compress.py +++ b/PaddleSlim/classification/distillation/compress.py @@ -34,7 +34,6 @@ add_arg('pretrained_model', str, None, "Whether to use pretraine add_arg('teacher_model', str, None, "Set the teacher network to use.") add_arg('teacher_pretrained_model', str, None, "Whether to use pretrained model.") add_arg('compress_config', str, None, "The config file for compression with yaml format.") -add_arg('quant_only', bool, False, "Only do quantization-aware training.") # yapf: enable model_list = [m for m in dir(models) if "__" not in m] @@ -50,45 +49,25 @@ def compress(args): # model definition model = models.__dict__[args.model]() - if args.model is "GoogleNet": - out0, out1, out2 = model.net(input=image, class_dim=args.class_dim) - cost0 = fluid.layers.cross_entropy(input=out0, label=label) - cost1 = fluid.layers.cross_entropy(input=out1, label=label) - cost2 = fluid.layers.cross_entropy(input=out2, label=label) - avg_cost0 = fluid.layers.mean(x=cost0) - avg_cost1 = fluid.layers.mean(x=cost1) - avg_cost2 = fluid.layers.mean(x=cost2) - avg_cost = avg_cost0 + 0.3 * avg_cost1 + 0.3 * avg_cost2 - acc_top1 = fluid.layers.accuracy(input=out0, label=label, k=1) - acc_top5 = fluid.layers.accuracy(input=out0, label=label, k=5) + if args.model == 'ResNet34': + model.prefix_name = 'res34' + out = model.net(input=image, class_dim=args.class_dim, fc_name='fc_0') else: - if args.model == 'ResNet34': - model.prefix_name = 'res34' - out = model.net(input=image, - class_dim=args.class_dim, - fc_name='fc_0') - else: - out = model.net(input=image, class_dim=args.class_dim) - cost = fluid.layers.cross_entropy(input=out, label=label) - avg_cost = fluid.layers.mean(x=cost) - acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) - acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) + out = model.net(input=image, class_dim=args.class_dim) + cost = fluid.layers.cross_entropy(input=out, label=label) + avg_cost = fluid.layers.mean(x=cost) + acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) + acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) #print("="*50+"student_model_params"+"="*50) #for v in fluid.default_main_program().list_vars(): # print(v.name, v.shape) + val_program = fluid.default_main_program().clone() - if args.quant_only: - boundaries = [ - args.total_images / args.batch_size * 10, - args.total_images / args.batch_size * 16 - ] - values = [1e-4, 1e-5, 1e-6] - else: - boundaries = [ - args.total_images / args.batch_size * 30, args.total_images / - args.batch_size * 60, args.total_images / args.batch_size * 90 - ] - values = [0.1, 0.01, 0.001, 0.0001] + boundaries = [ + args.total_images / args.batch_size * 30, args.total_images / + args.batch_size * 60, args.total_images / args.batch_size * 90 + ] + values = [0.1, 0.01, 0.001, 0.0001] opt = fluid.optimizer.Momentum( momentum=0.9, learning_rate=fluid.layers.piecewise_decay( @@ -117,37 +96,39 @@ def compress(args): teacher_programs = [] distiller_optimizer = None - if args.teacher_model: - teacher_model = models.__dict__[args.teacher_model](prefix_name='res50') - # define teacher program - teacher_program = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(teacher_program, startup_program): - img = teacher_program.global_block()._clone_variable( - image, force_persistable=False) - predict = teacher_model.net(img, - class_dim=args.class_dim, - fc_name='fc_0') - #print("="*50+"teacher_model_params"+"="*50) - #for v in teacher_program.list_vars(): - # print(v.name, v.shape) - exe.run(startup_program) - assert args.teacher_pretrained_model and os.path.exists( - args.teacher_pretrained_model - ), "teacher_pretrained_model should be set when teacher_model is not None." - def if_exist(var): - return os.path.exists( - os.path.join(args.teacher_pretrained_model, var.name)) + teacher_model = models.__dict__[args.teacher_model](prefix_name='res50') + # define teacher program + teacher_program = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(teacher_program, startup_program): + img = teacher_program.global_block()._clone_variable( + image, force_persistable=False) + predict = teacher_model.net(img, + class_dim=args.class_dim, + fc_name='fc_0') + #print("="*50+"teacher_model_params"+"="*50) + #for v in teacher_program.list_vars(): + # print(v.name, v.shape) + #return + + exe.run(startup_program) + assert args.teacher_pretrained_model and os.path.exists( + args.teacher_pretrained_model + ), "teacher_pretrained_model should be set when teacher_model is not None." + + def if_exist(var): + return os.path.exists( + os.path.join(args.teacher_pretrained_model, var.name)) - fluid.io.load_vars( - exe, - args.teacher_pretrained_model, - main_program=teacher_program, - predicate=if_exist) + fluid.io.load_vars( + exe, + args.teacher_pretrained_model, + main_program=teacher_program, + predicate=if_exist) - distiller_optimizer = opt - teacher_programs.append(teacher_program.clone(for_test=True)) + distiller_optimizer = opt + teacher_programs.append(teacher_program.clone(for_test=True)) com_pass = Compressor( place, diff --git a/PaddleSlim/classification/distillation/configs/mobilenetv1_resnet50_distillation.yaml b/PaddleSlim/classification/distillation/configs/mobilenetv1_resnet50_distillation.yaml index 8ade55df..03c0d6ca 100644 --- a/PaddleSlim/classification/distillation/configs/mobilenetv1_resnet50_distillation.yaml +++ b/PaddleSlim/classification/distillation/configs/mobilenetv1_resnet50_distillation.yaml @@ -3,7 +3,7 @@ distillers: fsp_distiller: class: 'FSPDistiller' teacher_pairs: [['res50_res2a_branch2a.conv2d.output.1.tmp_0', 'res50_res3a_branch2a.conv2d.output.1.tmp_0']] - student_pairs: [['depthwise_conv2d_1.tmp_0', 'conv2d_3.tmp_0']] + student_pairs: [['depthwise_conv2d_1.tmp_0', 'depthwise_conv2d_2.tmp_0']] distillation_loss_weight: 1 l2_distiller: class: 'L2Distiller' diff --git a/PaddleSlim/classification/distillation/configs/resnet34_resnet50_distillation.yaml b/PaddleSlim/classification/distillation/configs/resnet34_resnet50_distillation.yaml index 5013140b..4d4546e0 100644 --- a/PaddleSlim/classification/distillation/configs/resnet34_resnet50_distillation.yaml +++ b/PaddleSlim/classification/distillation/configs/resnet34_resnet50_distillation.yaml @@ -2,8 +2,8 @@ version: 1.0 distillers: fsp_distiller: class: 'FSPDistiller' - teacher_pairs: [['res50_res2a_branch2a.conv2d.output.1.tmp_0', 'res50_res2a_branch2c.conv2d.output.1.tmp_0'], ['res50_res3b_branch2a.conv2d.output.1.tmp_0', 'res50_res3b_branch2c.conv2d.output.1.tmp_0']] - student_pairs: [['res34_res2a_branch2a.conv2d.output.1.tmp_0', 'res34_res2a_branch2c.conv2d.output.1.tmp_0'], ['res34_res3b_branch2a.conv2d.output.1.tmp_0', 'res34_res3b_branch2c.conv2d.output.1.tmp_0']] + teacher_pairs: [['res50_res2a_branch2a.conv2d.output.1.tmp_0', 'res50_res2b_branch2a.conv2d.output.1.tmp_0'], ['res50_res4b_branch2a.conv2d.output.1.tmp_0', 'res50_res4c_branch2a.conv2d.output.1.tmp_0']] + student_pairs: [['res34_res2a_branch2a.conv2d.output.1.tmp_0', 'res34_res2a_branch1.conv2d.output.1.tmp_0'], ['res34_res4b_branch2a.conv2d.output.1.tmp_0', 'res34_res4c_branch2a.conv2d.output.1.tmp_0']] distillation_loss_weight: 1 l2_distiller: class: 'L2Distiller' diff --git a/PaddleSlim/classification/distillation/run.sh b/PaddleSlim/classification/distillation/run.sh index e9d88f50..c18e6eee 100644 --- a/PaddleSlim/classification/distillation/run.sh +++ b/PaddleSlim/classification/distillation/run.sh @@ -44,12 +44,6 @@ python -u compress.py \ > mobilenet_v1.log 2>&1 & tailf mobilenet_v1.log -cd ${pretrain_dir}/ResNet50_pretrained -for files in $(ls res50_*) - do mv $files ${files#*_} -done -cd - - ## for mobilenet_v2 distillation #cd ${pretrain_dir}/ResNet50_pretrained #for files in $(ls res50_*) @@ -67,12 +61,6 @@ cd - #--compress_config ./configs/mobilenetv2_resnet50_distillation.yaml\ #> mobilenet_v2.log 2>&1 & #tailf mobilenet_v2.log -# -#cd ${pretrain_dir}/ResNet50_pretrained -#for files in $(ls res50_*) -# do mv $files ${files#*_} -#done -#cd - ## for resnet34 distillation #cd ${pretrain_dir}/ResNet50_pretrained @@ -91,9 +79,3 @@ cd - #--compress_config ./configs/resnet34_resnet50_distillation.yaml \ #> resnet34.log 2>&1 & #tailf resnet34.log -# -#cd ${pretrain_dir}/ResNet50_pretrained -#for files in $(ls res50_*) -# do mv $files ${files#*_} -#done -#cd - -- GitLab