From 08b88c0c2b9752362eab478b4698cc74f2aee6e4 Mon Sep 17 00:00:00 2001 From: Bai Yifan <me@ethanbai.com> Date: Tue, 5 Jan 2021 11:30:53 +0800 Subject: [PATCH] distillation tutorial update (#578) (#583) --- .../quick_start/distillation_tutorial.md | 84 +++++++++++-------- 1 file changed, 48 insertions(+), 36 deletions(-) diff --git a/docs/zh_cn/quick_start/distillation_tutorial.md b/docs/zh_cn/quick_start/distillation_tutorial.md index aa2dd3e9..6a55a157 100755 --- a/docs/zh_cn/quick_start/distillation_tutorial.md +++ b/docs/zh_cn/quick_start/distillation_tutorial.md @@ -13,67 +13,69 @@ ## 1. 导入ä¾èµ– -PaddleSlimä¾èµ–Paddle1.7版本,请确认已æ£ç¡®å®‰è£…Paddle,然åŽæŒ‰ä»¥ä¸‹æ–¹å¼å¯¼å…¥Paddleå’ŒPaddleSlim: +PaddleSlimä¾èµ–Paddle2.0版本,请确认已æ£ç¡®å®‰è£…Paddle,然åŽæŒ‰ä»¥ä¸‹æ–¹å¼å¯¼å…¥Paddleå’ŒPaddleSlim: ``` import paddle -import paddle.fluid as fluid +import numpy as np import paddleslim as slim +paddle.enable_static() ``` ## 2. 定义student_programå’Œteacher_program -本教程在MNISTæ•°æ®é›†ä¸Šè¿›è¡ŒçŸ¥è¯†è’¸é¦çš„è®ç»ƒå’ŒéªŒè¯ï¼Œè¾“入图片尺寸为`[1, 28, 28]`,输出类别数为10。 +本教程在CIFARæ•°æ®é›†ä¸Šè¿›è¡ŒçŸ¥è¯†è’¸é¦çš„è®ç»ƒå’ŒéªŒè¯ï¼Œè¾“入图片尺寸为`[3, 32, 32]`,输出类别数为10。 选择`ResNet50`作为teacher对`MobileNet`结构的student进行蒸é¦è®ç»ƒã€‚ ```python model = slim.models.MobileNet() -student_program = fluid.Program() -student_startup = fluid.Program() -with fluid.program_guard(student_program, student_startup): - image = fluid.data( - name='image', shape=[None] + [1, 28, 28], dtype='float32') - label = fluid.data(name='label', shape=[None, 1], dtype='int64') +student_program = paddle.static.Program() +student_startup = paddle.static.Program() +with paddle.static.program_guard(student_program, student_startup): + image = paddle.static.data( + name='image', shape=[None, 3, 32, 32], dtype='float32') + label = paddle.static.data(name='label', shape=[None, 1], dtype='int64') + gt = paddle.reshape(label, [-1, 1]) out = model.net(input=image, class_dim=10) - cost = fluid.layers.cross_entropy(input=out, label=label) - avg_cost = fluid.layers.mean(x=cost) - acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) - acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) + cost = paddle.nn.functional.loss.cross_entropy(input=out, label=gt) + avg_cost = paddle.mean(x=cost) + acc_top1 = paddle.metric.accuracy(input=out, label=gt, k=1) + acc_top5 = paddle.metric.accuracy(input=out, label=gt, k=5) ``` ```python -model = slim.models.ResNet50() -teacher_program = fluid.Program() -teacher_startup = fluid.Program() -with fluid.program_guard(teacher_program, teacher_startup): - with fluid.unique_name.guard(): - image = fluid.data( - name='image', shape=[None] + [1, 28, 28], dtype='float32') +teacher_model = slim.models.ResNet50() +teacher_program = paddle.static.Program() +teacher_startup = paddle.static.Program() +with paddle.static.program_guard(teacher_program, teacher_startup): + with paddle.utils.unique_name.guard(): + image = paddle.static.data( + name='image', shape=[None, 3, 32, 32], dtype='float32') predict = teacher_model.net(image, class_dim=10) -exe = fluid.Executor(fluid.CPUPlace()) +exe = paddle.static.Executor(paddle.CPUPlace()) exe.run(teacher_startup) ``` ## 3. 选择特å¾å›¾ -我们å¯ä»¥ç”¨student_çš„list_vars方法æ¥è§‚察其ä¸å…¨éƒ¨çš„Variables,从ä¸é€‰å‡ºä¸€ä¸ªæˆ–多个å˜é‡ï¼ˆVariable)æ¥æ‹Ÿåˆteacher相应的å˜é‡ã€‚ +我们å¯ä»¥ç”¨student_çš„list_vars方法æ¥è§‚察其ä¸å…¨éƒ¨çš„Tensor,从ä¸é€‰å‡ºä¸€ä¸ªæˆ–多个å˜é‡ï¼ˆTensor)æ¥æ‹Ÿåˆteacher相应的å˜é‡ã€‚ ```python -# get all student variables +# get all student tensor student_vars = [] for v in student_program.list_vars(): student_vars.append((v.name, v.shape)) -#uncomment the following lines to observe student's variables for distillation +#uncomment the following lines to observe student's tensor for distillation #print("="*50+"student_model_vars"+"="*50) #print(student_vars) -# get all teacher variables +# get all teacher tensor teacher_vars = [] for v in teacher_program.list_vars(): teacher_vars.append((v.name, v.shape)) -#uncomment the following lines to observe teacher's variables for distillation +#uncomment the following lines to observe teacher's tensor for distillation #print("="*50+"teacher_model_vars"+"="*50) #print(teacher_vars) ``` @@ -81,33 +83,43 @@ for v in teacher_program.list_vars(): ç»è¿‡ç›é€‰æˆ‘们å¯ä»¥çœ‹åˆ°ï¼Œteacher_programä¸çš„'bn5c_branch2b.output.1.tmp_3'å’Œstudent_programçš„'depthwise_conv2d_11.tmp_0'尺寸一致,å¯ä»¥ç»„æˆè’¸é¦æŸå¤±å‡½æ•°ã€‚ ## 4. åˆå¹¶program (merge)å¹¶æ·»åŠ è’¸é¦loss -mergeæ“作将student_programå’Œteacher_programä¸çš„所有Variableså’ŒOpéƒ½å°†è¢«æ·»åŠ åˆ°åŒä¸€ä¸ªProgramä¸ï¼ŒåŒæ—¶ä¸ºäº†é¿å…两个programä¸æœ‰åŒåå˜é‡ä¼šå¼•èµ·å‘½å冲çªï¼Œmerge也会为teacher_programä¸çš„Variablesæ·»åŠ ä¸€ä¸ªåŒä¸€çš„命åå‰ç¼€name_prefix,其默认值是'teacher_' +mergeæ“作将student_programå’Œteacher_programä¸çš„所有Tensorå’ŒOpéƒ½å°†è¢«æ·»åŠ åˆ°åŒä¸€ä¸ªProgramä¸ï¼ŒåŒæ—¶ä¸ºäº†é¿å…两个programä¸æœ‰åŒåå˜é‡ä¼šå¼•èµ·å‘½å冲çªï¼Œmerge也会为teacher_programä¸çš„Tensoræ·»åŠ ä¸€ä¸ªåŒä¸€çš„命åå‰ç¼€name_prefix,其默认值是'teacher_' 为了确ä¿teacher网络和student网络输入的数æ®æ˜¯ä¸€æ ·çš„,mergeæ“作也会对两个program的输入数æ®å±‚进行åˆå¹¶æ“作,所以需è¦æŒ‡å®šä¸€ä¸ªæ•°æ®å±‚åç§°çš„æ˜ å°„å…³ç³»data_name_map,key是teacher的输入数æ®å称,value是studentçš„ ```python data_name_map = {'image': 'image'} -main = slim.dist.merge(teacher_program, student_program, data_name_map, fluid.CPUPlace()) -with fluid.program_guard(student_program, student_startup): +main = slim.dist.merge(teacher_program, student_program, data_name_map, paddle.CPUPlace()) +with paddle.static.program_guard(student_program, student_startup): l2_loss = slim.dist.l2_loss('teacher_bn5c_branch2b.output.1.tmp_3', 'depthwise_conv2d_11.tmp_0', student_program) loss = l2_loss + avg_cost - opt = fluid.optimizer.Momentum(0.01, 0.9) + opt = paddle.optimizer.Momentum(0.01, 0.9) opt.minimize(loss) exe.run(student_startup) ``` ## 5. 模型è®ç»ƒ -为了快速执行该示例,我们选å–简å•çš„MNISTæ•°æ®ï¼ŒPaddle框架的`paddle.dataset.mnist`包定义了MNISTæ•°æ®çš„下载和读å–。 代ç 如下: +为了快速执行该示例,我们选å–简å•çš„CIFARæ•°æ®ï¼ŒPaddle框架的`paddle.vision.datasets.Cifar10`包定义了CIFAR10æ•°æ®çš„下载和读å–。 代ç 如下: ```python -train_reader = paddle.fluid.io.batch( - paddle.dataset.mnist.train(), batch_size=128, drop_last=True) -train_feeder = fluid.DataFeeder(['image', 'label'], fluid.CPUPlace(), student_program) +import paddle.vision.transforms as T +transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) +train_dataset = paddle.vision.datasets.Cifar10( + mode="train", backend="cv2", transform=transform) + +train_loader = paddle.io.DataLoader( + train_dataset, + places=paddle.CPUPlace(), + feed_list=[image, label], + drop_last=True, + batch_size=64, + return_list=False, + shuffle=True) ``` ```python -for data in train_reader(): - acc1, acc5, loss_np = exe.run(student_program, feed=train_feeder.feed(data), fetch_list=[acc_top1.name, acc_top5.name, loss.name]) +for idx, data in enumerate(train_loader): + acc1, acc5, loss_np = exe.run(student_program, feed=data, fetch_list=[acc_top1.name, acc_top5.name, loss.name]) print("Acc1: {:.6f}, Acc5: {:.6f}, Loss: {:.6f}".format(acc1.mean(), acc5.mean(), loss_np.mean())) ``` -- GitLab