Fix PaddleSlim distillation demo configs (#3505)

* fix some configs * fix doc

Fix PaddleSlim distillation demo configs (#3505)
* fix some configs * fix doc
d159a6b3 · Bai Yifan · GitHub · ea8299a9 · d159a6b3 · d159a6b3
5 changed file
--- a/PaddleSlim/classification/distillation/README.md
+++ b/PaddleSlim/classification/distillation/README.md
@@ -139,16 +139,30 @@ strategies:
 | baseline | 70.99%/89.68%     |
 | 蒸馏后     | -                 |
->训练超参：
+#### 训练超参
+- batch size: 256
+- lr_strategy: piecewise_decay
+- step_epochs: 30, 60, 90
+- num_epochs: 120
+- l2_decay: 4e-5
+- init lr: 0.1
 ### MobileNetV2
 | FLOPS    | top1_acc/top5_acc |
 | -------- | ----------------- |
 | baseline | 72.15%/90.65%     |
-| 蒸馏后     | -                 |
+| 蒸馏后     | 70.66%/90.42%                 |
->训练超参：
+#### 训练超参
+- batch size: 256
+- lr_strategy: piecewise_decay
+- step_epochs: 30, 60, 90
+- num_epochs: 120
+- l2_decay: 4e-5
+- init lr: 0.1
 ### ResNet34
@@ -157,6 +171,13 @@ strategies:
 | baseline | 74.57%/92.14%     |
 | 蒸馏后     | -                 |
->训练超参：
+#### 训练超参
+- batch size: 256
+- lr_strategy: piecewise_decay
+- step_epochs: 30, 60, 90
+- num_epochs: 120
+- l2_decay: 4e-5
+- init lr: 0.1
 ## FAQ
--- a/PaddleSlim/classification/distillation/compress.py
+++ b/PaddleSlim/classification/distillation/compress.py
@@ -34,7 +34,6 @@ add_arg('pretrained_model', str,  None,                "Whether to use pretraine
 add_arg('teacher_model',    str,  None,          "Set the teacher network to use.")
 add_arg('teacher_pretrained_model', str,  None,                "Whether to use pretrained model.")
 add_arg('compress_config',  str,  None,                 "The config file for compression with yaml format.")
-add_arg('quant_only',       bool, False,                "Only do quantization-aware training.")
 # yapf: enable
 model_list = [m for m in dir(models) if "__" not in m]
@@ -50,45 +49,25 @@ def compress(args):
    # model definition
    model = models.__dict__[args.model]()
-    if args.model is "GoogleNet":
+    if args.model == 'ResNet34':
-        out0, out1, out2 = model.net(input=image, class_dim=args.class_dim)
+        model.prefix_name = 'res34'
-        cost0 = fluid.layers.cross_entropy(input=out0, label=label)
+        out = model.net(input=image, class_dim=args.class_dim, fc_name='fc_0')
-        cost1 = fluid.layers.cross_entropy(input=out1, label=label)
-        cost2 = fluid.layers.cross_entropy(input=out2, label=label)
-        avg_cost0 = fluid.layers.mean(x=cost0)
-        avg_cost1 = fluid.layers.mean(x=cost1)
-        avg_cost2 = fluid.layers.mean(x=cost2)
-        avg_cost = avg_cost0 + 0.3 * avg_cost1 + 0.3 * avg_cost2
-        acc_top1 = fluid.layers.accuracy(input=out0, label=label, k=1)
-        acc_top5 = fluid.layers.accuracy(input=out0, label=label, k=5)
    else:
-        if args.model == 'ResNet34':
+        out = model.net(input=image, class_dim=args.class_dim)
-            model.prefix_name = 'res34'
+    cost = fluid.layers.cross_entropy(input=out, label=label)
-            out = model.net(input=image,
+    avg_cost = fluid.layers.mean(x=cost)
-                            class_dim=args.class_dim,
+    acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
-                            fc_name='fc_0')
+    acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
-        else:
-            out = model.net(input=image, class_dim=args.class_dim)
-        cost = fluid.layers.cross_entropy(input=out, label=label)
-        avg_cost = fluid.layers.mean(x=cost)
-        acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
-        acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
    #print("="*50+"student_model_params"+"="*50)
    #for v in fluid.default_main_program().list_vars():
    #    print(v.name, v.shape)
    val_program = fluid.default_main_program().clone()
-    if args.quant_only:
+    boundaries = [
-        boundaries = [
+        args.total_images / args.batch_size * 30, args.total_images /
-            args.total_images / args.batch_size * 10,
+        args.batch_size * 60, args.total_images / args.batch_size * 90
-            args.total_images / args.batch_size * 16
+    ]
-        ]
+    values = [0.1, 0.01, 0.001, 0.0001]
-        values = [1e-4, 1e-5, 1e-6]
-    else:
-        boundaries = [
-            args.total_images / args.batch_size * 30, args.total_images /
-            args.batch_size * 60, args.total_images / args.batch_size * 90
-        ]
-        values = [0.1, 0.01, 0.001, 0.0001]
    opt = fluid.optimizer.Momentum(
        momentum=0.9,
        learning_rate=fluid.layers.piecewise_decay(
@@ -117,37 +96,39 @@ def compress(args):
    teacher_programs = []
    distiller_optimizer = None
-    if args.teacher_model:
-        teacher_model = models.__dict__[args.teacher_model](prefix_name='res50')
-        # define teacher program
-        teacher_program = fluid.Program()
-        startup_program = fluid.Program()
-        with fluid.program_guard(teacher_program, startup_program):
-            img = teacher_program.global_block()._clone_variable(
-                image, force_persistable=False)
-            predict = teacher_model.net(img,
-                                        class_dim=args.class_dim,
-                                        fc_name='fc_0')
-        #print("="*50+"teacher_model_params"+"="*50)
-        #for v in teacher_program.list_vars():
-        #    print(v.name, v.shape)
-        exe.run(startup_program)
-        assert args.teacher_pretrained_model and os.path.exists(
-            args.teacher_pretrained_model
-        ), "teacher_pretrained_model should be set when teacher_model is not None."
-        def if_exist(var):
+    teacher_model = models.__dict__[args.teacher_model](prefix_name='res50')
-            return os.path.exists(
+    # define teacher program
-                os.path.join(args.teacher_pretrained_model, var.name))
+    teacher_program = fluid.Program()
+    startup_program = fluid.Program()
+    with fluid.program_guard(teacher_program, startup_program):
+        img = teacher_program.global_block()._clone_variable(
+            image, force_persistable=False)
+        predict = teacher_model.net(img,
+                                    class_dim=args.class_dim,
+                                    fc_name='fc_0')
+    #print("="*50+"teacher_model_params"+"="*50)
+    #for v in teacher_program.list_vars():
+    #    print(v.name, v.shape)
+    #return
+    exe.run(startup_program)
+    assert args.teacher_pretrained_model and os.path.exists(
+        args.teacher_pretrained_model
+    ), "teacher_pretrained_model should be set when teacher_model is not None."
+    def if_exist(var):
+        return os.path.exists(
+            os.path.join(args.teacher_pretrained_model, var.name))
-        fluid.io.load_vars(
+    fluid.io.load_vars(
-            exe,
+        exe,
-            args.teacher_pretrained_model,
+        args.teacher_pretrained_model,
-            main_program=teacher_program,
+        main_program=teacher_program,
-            predicate=if_exist)
+        predicate=if_exist)
-        distiller_optimizer = opt
+    distiller_optimizer = opt
-        teacher_programs.append(teacher_program.clone(for_test=True))
+    teacher_programs.append(teacher_program.clone(for_test=True))
    com_pass = Compressor(
        place,

--- a/PaddleSlim/classification/distillation/configs/mobilenetv1_resnet50_distillation.yaml
+++ b/PaddleSlim/classification/distillation/configs/mobilenetv1_resnet50_distillation.yaml
@@ -3,7 +3,7 @@ distillers:
    fsp_distiller:
        class: 'FSPDistiller'
        teacher_pairs: [['res50_res2a_branch2a.conv2d.output.1.tmp_0', 'res50_res3a_branch2a.conv2d.output.1.tmp_0']]
-        student_pairs: [['depthwise_conv2d_1.tmp_0', 'conv2d_3.tmp_0']]
+        student_pairs: [['depthwise_conv2d_1.tmp_0', 'depthwise_conv2d_2.tmp_0']]
        distillation_loss_weight: 1
    l2_distiller:
        class: 'L2Distiller'

--- a/PaddleSlim/classification/distillation/configs/resnet34_resnet50_distillation.yaml
+++ b/PaddleSlim/classification/distillation/configs/resnet34_resnet50_distillation.yaml
@@ -2,8 +2,8 @@ version: 1.0
 distillers:
    fsp_distiller:
        class: 'FSPDistiller'
-        teacher_pairs: [['res50_res2a_branch2a.conv2d.output.1.tmp_0', 'res50_res2a_branch2c.conv2d.output.1.tmp_0'], ['res50_res3b_branch2a.conv2d.output.1.tmp_0', 'res50_res3b_branch2c.conv2d.output.1.tmp_0']]
+        teacher_pairs: [['res50_res2a_branch2a.conv2d.output.1.tmp_0', 'res50_res2b_branch2a.conv2d.output.1.tmp_0'], ['res50_res4b_branch2a.conv2d.output.1.tmp_0', 'res50_res4c_branch2a.conv2d.output.1.tmp_0']]
-        student_pairs: [['res34_res2a_branch2a.conv2d.output.1.tmp_0', 'res34_res2a_branch2c.conv2d.output.1.tmp_0'], ['res34_res3b_branch2a.conv2d.output.1.tmp_0', 'res34_res3b_branch2c.conv2d.output.1.tmp_0']]
+        student_pairs: [['res34_res2a_branch2a.conv2d.output.1.tmp_0', 'res34_res2a_branch1.conv2d.output.1.tmp_0'], ['res34_res4b_branch2a.conv2d.output.1.tmp_0', 'res34_res4c_branch2a.conv2d.output.1.tmp_0']]
        distillation_loss_weight: 1
    l2_distiller:
        class: 'L2Distiller'

--- a/PaddleSlim/classification/distillation/run.sh
+++ b/PaddleSlim/classification/distillation/run.sh
@@ -44,12 +44,6 @@ python -u compress.py \
 > mobilenet_v1.log 2>&1 &
 tailf mobilenet_v1.log
-cd ${pretrain_dir}/ResNet50_pretrained
-for files in $(ls res50_*)
-    do mv $files ${files#*_}
-done
-cd -
 ## for mobilenet_v2 distillation
 #cd ${pretrain_dir}/ResNet50_pretrained
 #for files in $(ls res50_*)
@@ -67,12 +61,6 @@ cd -
 #--compress_config ./configs/mobilenetv2_resnet50_distillation.yaml\
 #> mobilenet_v2.log 2>&1 &
 #tailf mobilenet_v2.log
-#
-#cd ${pretrain_dir}/ResNet50_pretrained
-#for files in $(ls res50_*)
-#    do mv $files ${files#*_}
-#done
-#cd -
 ## for resnet34 distillation
 #cd ${pretrain_dir}/ResNet50_pretrained
@@ -91,9 +79,3 @@ cd -
 #--compress_config ./configs/resnet34_resnet50_distillation.yaml \
 #> resnet34.log 2>&1 &
 #tailf resnet34.log
-#
-#cd ${pretrain_dir}/ResNet50_pretrained
-#for files in $(ls res50_*)
-#    do mv $files ${files#*_}
-#done
-#cd -