From d159a6b389638b4e8dcafd1e115e163590516e46 Mon Sep 17 00:00:00 2001
From: Bai Yifan <me@ethanbai.com>
Date: Fri, 11 Oct 2019 13:07:59 +0800
Subject: [PATCH] Fix PaddleSlim distillation demo configs (#3505)

* fix some configs

* fix doc
---
 .../classification/distillation/README.md     |  29 ++++-
 .../classification/distillation/compress.py   | 107 +++++++-----------
 .../mobilenetv1_resnet50_distillation.yaml    |   2 +-
 .../resnet34_resnet50_distillation.yaml       |   4 +-
 PaddleSlim/classification/distillation/run.sh |  18 ---
 5 files changed, 72 insertions(+), 88 deletions(-)

diff --git a/PaddleSlim/classification/distillation/README.md b/PaddleSlim/classification/distillation/README.md
index a9ee6f3d..13b83a3b 100755
--- a/PaddleSlim/classification/distillation/README.md
+++ b/PaddleSlim/classification/distillation/README.md
@@ -139,16 +139,30 @@ strategies:
 | baseline | 70.99%/89.68%     |
 | 蒸馏后     | -                 |
 
->训练超参：
+#### 训练超参
+
+- batch size: 256
+- lr_strategy: piecewise_decay
+- step_epochs: 30, 60, 90
+- num_epochs: 120
+- l2_decay: 4e-5
+- init lr: 0.1
 
 ### MobileNetV2
 
 | FLOPS    | top1_acc/top5_acc |
 | -------- | ----------------- |
 | baseline | 72.15%/90.65%     |
-| 蒸馏后     | -                 |
+| 蒸馏后     | 70.66%/90.42%                 |
 
->训练超参：
+#### 训练超参
+
+- batch size: 256
+- lr_strategy: piecewise_decay
+- step_epochs: 30, 60, 90
+- num_epochs: 120
+- l2_decay: 4e-5
+- init lr: 0.1
 
 ### ResNet34
 
@@ -157,6 +171,13 @@ strategies:
 | baseline | 74.57%/92.14%     |
 | 蒸馏后     | -                 |
 
->训练超参：
+#### 训练超参
+
+- batch size: 256
+- lr_strategy: piecewise_decay
+- step_epochs: 30, 60, 90
+- num_epochs: 120
+- l2_decay: 4e-5
+- init lr: 0.1
 
 ## FAQ
diff --git a/PaddleSlim/classification/distillation/compress.py b/PaddleSlim/classification/distillation/compress.py
index 8c6ac9ae..49a800f8 100644
--- a/PaddleSlim/classification/distillation/compress.py
+++ b/PaddleSlim/classification/distillation/compress.py
@@ -34,7 +34,6 @@ add_arg('pretrained_model', str,  None,                "Whether to use pretraine
 add_arg('teacher_model',    str,  None,          "Set the teacher network to use.")
 add_arg('teacher_pretrained_model', str,  None,                "Whether to use pretrained model.")
 add_arg('compress_config',  str,  None,                 "The config file for compression with yaml format.")
-add_arg('quant_only',       bool, False,                "Only do quantization-aware training.")
 # yapf: enable
 
 model_list = [m for m in dir(models) if "__" not in m]
@@ -50,45 +49,25 @@ def compress(args):
     # model definition
     model = models.__dict__[args.model]()
 
-    if args.model is "GoogleNet":
-        out0, out1, out2 = model.net(input=image, class_dim=args.class_dim)
-        cost0 = fluid.layers.cross_entropy(input=out0, label=label)
-        cost1 = fluid.layers.cross_entropy(input=out1, label=label)
-        cost2 = fluid.layers.cross_entropy(input=out2, label=label)
-        avg_cost0 = fluid.layers.mean(x=cost0)
-        avg_cost1 = fluid.layers.mean(x=cost1)
-        avg_cost2 = fluid.layers.mean(x=cost2)
-        avg_cost = avg_cost0 + 0.3 * avg_cost1 + 0.3 * avg_cost2
-        acc_top1 = fluid.layers.accuracy(input=out0, label=label, k=1)
-        acc_top5 = fluid.layers.accuracy(input=out0, label=label, k=5)
+    if args.model == 'ResNet34':
+        model.prefix_name = 'res34'
+        out = model.net(input=image, class_dim=args.class_dim, fc_name='fc_0')
     else:
-        if args.model == 'ResNet34':
-            model.prefix_name = 'res34'
-            out = model.net(input=image,
-                            class_dim=args.class_dim,
-                            fc_name='fc_0')
-        else:
-            out = model.net(input=image, class_dim=args.class_dim)
-        cost = fluid.layers.cross_entropy(input=out, label=label)
-        avg_cost = fluid.layers.mean(x=cost)
-        acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
-        acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
+        out = model.net(input=image, class_dim=args.class_dim)
+    cost = fluid.layers.cross_entropy(input=out, label=label)
+    avg_cost = fluid.layers.mean(x=cost)
+    acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
+    acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
     #print("="*50+"student_model_params"+"="*50)
     #for v in fluid.default_main_program().list_vars():
     #    print(v.name, v.shape)
+
     val_program = fluid.default_main_program().clone()
-    if args.quant_only:
-        boundaries = [
-            args.total_images / args.batch_size * 10,
-            args.total_images / args.batch_size * 16
-        ]
-        values = [1e-4, 1e-5, 1e-6]
-    else:
-        boundaries = [
-            args.total_images / args.batch_size * 30, args.total_images /
-            args.batch_size * 60, args.total_images / args.batch_size * 90
-        ]
-        values = [0.1, 0.01, 0.001, 0.0001]
+    boundaries = [
+        args.total_images / args.batch_size * 30, args.total_images /
+        args.batch_size * 60, args.total_images / args.batch_size * 90
+    ]
+    values = [0.1, 0.01, 0.001, 0.0001]
     opt = fluid.optimizer.Momentum(
         momentum=0.9,
         learning_rate=fluid.layers.piecewise_decay(
@@ -117,37 +96,39 @@ def compress(args):
 
     teacher_programs = []
     distiller_optimizer = None
-    if args.teacher_model:
-        teacher_model = models.__dict__[args.teacher_model](prefix_name='res50')
-        # define teacher program
-        teacher_program = fluid.Program()
-        startup_program = fluid.Program()
-        with fluid.program_guard(teacher_program, startup_program):
-            img = teacher_program.global_block()._clone_variable(
-                image, force_persistable=False)
-            predict = teacher_model.net(img,
-                                        class_dim=args.class_dim,
-                                        fc_name='fc_0')
-        #print("="*50+"teacher_model_params"+"="*50)
-        #for v in teacher_program.list_vars():
-        #    print(v.name, v.shape)
-        exe.run(startup_program)
-        assert args.teacher_pretrained_model and os.path.exists(
-            args.teacher_pretrained_model
-        ), "teacher_pretrained_model should be set when teacher_model is not None."
 
-        def if_exist(var):
-            return os.path.exists(
-                os.path.join(args.teacher_pretrained_model, var.name))
+    teacher_model = models.__dict__[args.teacher_model](prefix_name='res50')
+    # define teacher program
+    teacher_program = fluid.Program()
+    startup_program = fluid.Program()
+    with fluid.program_guard(teacher_program, startup_program):
+        img = teacher_program.global_block()._clone_variable(
+            image, force_persistable=False)
+        predict = teacher_model.net(img,
+                                    class_dim=args.class_dim,
+                                    fc_name='fc_0')
+    #print("="*50+"teacher_model_params"+"="*50)
+    #for v in teacher_program.list_vars():
+    #    print(v.name, v.shape)
+    #return
+
+    exe.run(startup_program)
+    assert args.teacher_pretrained_model and os.path.exists(
+        args.teacher_pretrained_model
+    ), "teacher_pretrained_model should be set when teacher_model is not None."
+
+    def if_exist(var):
+        return os.path.exists(
+            os.path.join(args.teacher_pretrained_model, var.name))
 
-        fluid.io.load_vars(
-            exe,
-            args.teacher_pretrained_model,
-            main_program=teacher_program,
-            predicate=if_exist)
+    fluid.io.load_vars(
+        exe,
+        args.teacher_pretrained_model,
+        main_program=teacher_program,
+        predicate=if_exist)
 
-        distiller_optimizer = opt
-        teacher_programs.append(teacher_program.clone(for_test=True))
+    distiller_optimizer = opt
+    teacher_programs.append(teacher_program.clone(for_test=True))
 
     com_pass = Compressor(
         place,
diff --git a/PaddleSlim/classification/distillation/configs/mobilenetv1_resnet50_distillation.yaml b/PaddleSlim/classification/distillation/configs/mobilenetv1_resnet50_distillation.yaml
index 8ade55df..03c0d6ca 100644
--- a/PaddleSlim/classification/distillation/configs/mobilenetv1_resnet50_distillation.yaml
+++ b/PaddleSlim/classification/distillation/configs/mobilenetv1_resnet50_distillation.yaml
@@ -3,7 +3,7 @@ distillers:
     fsp_distiller:
         class: 'FSPDistiller'
         teacher_pairs: [['res50_res2a_branch2a.conv2d.output.1.tmp_0', 'res50_res3a_branch2a.conv2d.output.1.tmp_0']]
-        student_pairs: [['depthwise_conv2d_1.tmp_0', 'conv2d_3.tmp_0']]
+        student_pairs: [['depthwise_conv2d_1.tmp_0', 'depthwise_conv2d_2.tmp_0']]
         distillation_loss_weight: 1
     l2_distiller:
         class: 'L2Distiller'
diff --git a/PaddleSlim/classification/distillation/configs/resnet34_resnet50_distillation.yaml b/PaddleSlim/classification/distillation/configs/resnet34_resnet50_distillation.yaml
index 5013140b..4d4546e0 100644
--- a/PaddleSlim/classification/distillation/configs/resnet34_resnet50_distillation.yaml
+++ b/PaddleSlim/classification/distillation/configs/resnet34_resnet50_distillation.yaml
@@ -2,8 +2,8 @@ version: 1.0
 distillers:
     fsp_distiller:
         class: 'FSPDistiller'
-        teacher_pairs: [['res50_res2a_branch2a.conv2d.output.1.tmp_0', 'res50_res2a_branch2c.conv2d.output.1.tmp_0'], ['res50_res3b_branch2a.conv2d.output.1.tmp_0', 'res50_res3b_branch2c.conv2d.output.1.tmp_0']]
-        student_pairs: [['res34_res2a_branch2a.conv2d.output.1.tmp_0', 'res34_res2a_branch2c.conv2d.output.1.tmp_0'], ['res34_res3b_branch2a.conv2d.output.1.tmp_0', 'res34_res3b_branch2c.conv2d.output.1.tmp_0']]
+        teacher_pairs: [['res50_res2a_branch2a.conv2d.output.1.tmp_0', 'res50_res2b_branch2a.conv2d.output.1.tmp_0'], ['res50_res4b_branch2a.conv2d.output.1.tmp_0', 'res50_res4c_branch2a.conv2d.output.1.tmp_0']]
+        student_pairs: [['res34_res2a_branch2a.conv2d.output.1.tmp_0', 'res34_res2a_branch1.conv2d.output.1.tmp_0'], ['res34_res4b_branch2a.conv2d.output.1.tmp_0', 'res34_res4c_branch2a.conv2d.output.1.tmp_0']]
         distillation_loss_weight: 1
     l2_distiller:
         class: 'L2Distiller'
diff --git a/PaddleSlim/classification/distillation/run.sh b/PaddleSlim/classification/distillation/run.sh
index e9d88f50..c18e6eee 100644
--- a/PaddleSlim/classification/distillation/run.sh
+++ b/PaddleSlim/classification/distillation/run.sh
@@ -44,12 +44,6 @@ python -u compress.py \
 > mobilenet_v1.log 2>&1 &
 tailf mobilenet_v1.log
 
-cd ${pretrain_dir}/ResNet50_pretrained
-for files in $(ls res50_*)
-    do mv $files ${files#*_}
-done
-cd -
-
 ## for mobilenet_v2 distillation
 #cd ${pretrain_dir}/ResNet50_pretrained
 #for files in $(ls res50_*)
@@ -67,12 +61,6 @@ cd -
 #--compress_config ./configs/mobilenetv2_resnet50_distillation.yaml\
 #> mobilenet_v2.log 2>&1 &
 #tailf mobilenet_v2.log
-#
-#cd ${pretrain_dir}/ResNet50_pretrained
-#for files in $(ls res50_*)
-#    do mv $files ${files#*_}
-#done
-#cd -
 
 ## for resnet34 distillation
 #cd ${pretrain_dir}/ResNet50_pretrained
@@ -91,9 +79,3 @@ cd -
 #--compress_config ./configs/resnet34_resnet50_distillation.yaml \
 #> resnet34.log 2>&1 &
 #tailf resnet34.log
-#
-#cd ${pretrain_dir}/ResNet50_pretrained
-#for files in $(ls res50_*)
-#    do mv $files ${files#*_}
-#done
-#cd -
-- 
GitLab