Fix PaddleSlim distillation demo configs (#3505)

* fix some configs * fix doc

Fix PaddleSlim distillation demo configs (#3505)
* fix some configs * fix doc
d159a6b3 · Bai Yifan · GitHub · ea8299a9 · d159a6b3 · d159a6b3
5 changed file
--- a/PaddleSlim/classification/distillation/README.md
+++ b/PaddleSlim/classification/distillation/README.md
@@ -139,16 +139,30 @@ strategies:
 | baseline | 70.99%/89.68%     |
 | 蒸馏后     | -                 |
->训练超参：
+#### 训练超参
+- batch size: 256
+- lr_strategy: piecewise_decay
+- step_epochs: 30, 60, 90
+- num_epochs: 120
+- l2_decay: 4e-5
+- init lr: 0.1
 ### MobileNetV2
 | FLOPS    | top1_acc/top5_acc |
 | -------- | ----------------- |
 | baseline | 72.15%/90.65%     |
-| 蒸馏后     | -                 |
+| 蒸馏后     | 70.66%/90.42%                 |
->训练超参：
+#### 训练超参
+- batch size: 256
+- lr_strategy: piecewise_decay
+- step_epochs: 30, 60, 90
+- num_epochs: 120
+- l2_decay: 4e-5
+- init lr: 0.1
 ### ResNet34
@@ -157,6 +171,13 @@ strategies:
 | baseline | 74.57%/92.14%     |
 | 蒸馏后     | -                 |
->训练超参：
+#### 训练超参
+- batch size: 256
+- lr_strategy: piecewise_decay
+- step_epochs: 30, 60, 90
+- num_epochs: 120
+- l2_decay: 4e-5
+- init lr: 0.1
 ## FAQ
--- a/PaddleSlim/classification/distillation/compress.py
+++ b/PaddleSlim/classification/distillation/compress.py
@@ -34,7 +34,6 @@ add_arg('pretrained_model', str,  None,                "Whether to use pretraine
 add_arg('teacher_model',    str,  None,          "Set the teacher network to use.")
 add_arg('teacher_pretrained_model', str,  None,                "Whether to use pretrained model.")
 add_arg('compress_config',  str,  None,                 "The config file for compression with yaml format.")
-add_arg('quant_only',       bool, False,                "Only do quantization-aware training.")
 # yapf: enable
 model_list = [m for m in dir(models) if "__" not in m]
@@ -50,23 +49,9 @@ def compress(args):
    # model definition
    model = models.__dict__[args.model]()
-    if args.model is "GoogleNet":
-        out0, out1, out2 = model.net(input=image, class_dim=args.class_dim)
-        cost0 = fluid.layers.cross_entropy(input=out0, label=label)
-        cost1 = fluid.layers.cross_entropy(input=out1, label=label)
-        cost2 = fluid.layers.cross_entropy(input=out2, label=label)
-        avg_cost0 = fluid.layers.mean(x=cost0)
-        avg_cost1 = fluid.layers.mean(x=cost1)
-        avg_cost2 = fluid.layers.mean(x=cost2)
-        avg_cost = avg_cost0 + 0.3 * avg_cost1 + 0.3 * avg_cost2
-        acc_top1 = fluid.layers.accuracy(input=out0, label=label, k=1)
-        acc_top5 = fluid.layers.accuracy(input=out0, label=label, k=5)
-    else:
    if args.model == 'ResNet34':
        model.prefix_name = 'res34'
-            out = model.net(input=image,
+        out = model.net(input=image, class_dim=args.class_dim, fc_name='fc_0')
-                            class_dim=args.class_dim,
-                            fc_name='fc_0')
    else:
        out = model.net(input=image, class_dim=args.class_dim)
    cost = fluid.layers.cross_entropy(input=out, label=label)
@@ -76,14 +61,8 @@ def compress(args):
    #print("="*50+"student_model_params"+"="*50)
    #for v in fluid.default_main_program().list_vars():
    #    print(v.name, v.shape)
    val_program = fluid.default_main_program().clone()
-    if args.quant_only:
-        boundaries = [
-            args.total_images / args.batch_size * 10,
-            args.total_images / args.batch_size * 16
-        ]
-        values = [1e-4, 1e-5, 1e-6]
-    else:
    boundaries = [
        args.total_images / args.batch_size * 30, args.total_images /
        args.batch_size * 60, args.total_images / args.batch_size * 90
@@ -117,7 +96,7 @@ def compress(args):
    teacher_programs = []
    distiller_optimizer = None
-    if args.teacher_model:
    teacher_model = models.__dict__[args.teacher_model](prefix_name='res50')
    # define teacher program
    teacher_program = fluid.Program()
@@ -131,6 +110,8 @@ def compress(args):
    #print("="*50+"teacher_model_params"+"="*50)
    #for v in teacher_program.list_vars():
    #    print(v.name, v.shape)
+    #return
    exe.run(startup_program)
    assert args.teacher_pretrained_model and os.path.exists(
        args.teacher_pretrained_model

--- a/PaddleSlim/classification/distillation/configs/mobilenetv1_resnet50_distillation.yaml
+++ b/PaddleSlim/classification/distillation/configs/mobilenetv1_resnet50_distillation.yaml
@@ -3,7 +3,7 @@ distillers:
    fsp_distiller:
        class: 'FSPDistiller'
        teacher_pairs: [['res50_res2a_branch2a.conv2d.output.1.tmp_0', 'res50_res3a_branch2a.conv2d.output.1.tmp_0']]
-        student_pairs: [['depthwise_conv2d_1.tmp_0', 'conv2d_3.tmp_0']]
+        student_pairs: [['depthwise_conv2d_1.tmp_0', 'depthwise_conv2d_2.tmp_0']]
        distillation_loss_weight: 1
    l2_distiller:
        class: 'L2Distiller'

--- a/PaddleSlim/classification/distillation/configs/resnet34_resnet50_distillation.yaml
+++ b/PaddleSlim/classification/distillation/configs/resnet34_resnet50_distillation.yaml
@@ -2,8 +2,8 @@ version: 1.0
 distillers:
    fsp_distiller:
        class: 'FSPDistiller'
-        teacher_pairs: [['res50_res2a_branch2a.conv2d.output.1.tmp_0', 'res50_res2a_branch2c.conv2d.output.1.tmp_0'], ['res50_res3b_branch2a.conv2d.output.1.tmp_0', 'res50_res3b_branch2c.conv2d.output.1.tmp_0']]
+        teacher_pairs: [['res50_res2a_branch2a.conv2d.output.1.tmp_0', 'res50_res2b_branch2a.conv2d.output.1.tmp_0'], ['res50_res4b_branch2a.conv2d.output.1.tmp_0', 'res50_res4c_branch2a.conv2d.output.1.tmp_0']]
-        student_pairs: [['res34_res2a_branch2a.conv2d.output.1.tmp_0', 'res34_res2a_branch2c.conv2d.output.1.tmp_0'], ['res34_res3b_branch2a.conv2d.output.1.tmp_0', 'res34_res3b_branch2c.conv2d.output.1.tmp_0']]
+        student_pairs: [['res34_res2a_branch2a.conv2d.output.1.tmp_0', 'res34_res2a_branch1.conv2d.output.1.tmp_0'], ['res34_res4b_branch2a.conv2d.output.1.tmp_0', 'res34_res4c_branch2a.conv2d.output.1.tmp_0']]
        distillation_loss_weight: 1
    l2_distiller:
        class: 'L2Distiller'

--- a/PaddleSlim/classification/distillation/run.sh
+++ b/PaddleSlim/classification/distillation/run.sh
@@ -44,12 +44,6 @@ python -u compress.py \
 > mobilenet_v1.log 2>&1 &
 tailf mobilenet_v1.log
-cd ${pretrain_dir}/ResNet50_pretrained
-for files in $(ls res50_*)
-    do mv $files ${files#*_}
-done
-cd -
 ## for mobilenet_v2 distillation
 #cd ${pretrain_dir}/ResNet50_pretrained
 #for files in $(ls res50_*)
@@ -67,12 +61,6 @@ cd -
 #--compress_config ./configs/mobilenetv2_resnet50_distillation.yaml\
 #> mobilenet_v2.log 2>&1 &
 #tailf mobilenet_v2.log
-#
-#cd ${pretrain_dir}/ResNet50_pretrained
-#for files in $(ls res50_*)
-#    do mv $files ${files#*_}
-#done
-#cd -
 ## for resnet34 distillation
 #cd ${pretrain_dir}/ResNet50_pretrained
@@ -91,9 +79,3 @@ cd -
 #--compress_config ./configs/resnet34_resnet50_distillation.yaml \
 #> resnet34.log 2>&1 &
 #tailf resnet34.log
-#
-#cd ${pretrain_dir}/ResNet50_pretrained
-#for files in $(ls res50_*)
-#    do mv $files ${files#*_}
-#done
-#cd -