蒸馏模型训练报错 (#467) · Issue · PaddlePaddle / PaddleSlim

蒸馏模型训练报错

Created by: 1558513572

**用的paddleslim现成的蒸馏函数，训练的时候报错

C:\Anaconda3\lib\site-packages\paddle\fluid\executor.py:789: UserWarning: The following exception is not an EOF exception.
  "The following exception is not an EOF exception.")
Traceback (most recent call last):
  File "C:/Users/虹虹/Desktop/加油4/qinglianghua_python/copy999999999999999.py", line 133, in <module>
    acc1, loss_np = exe.run(student_program, feed=train_feeder.feed(data), fetch_list=[acc_top1.name, avg_cost.name])
  File "C:\Anaconda3\lib\site-packages\paddle\fluid\executor.py", line 790, in run
    six.reraise(*sys.exc_info())
  File "C:\Anaconda3\lib\site-packages\six.py", line 703, in reraise
    raise value
  File "C:\Anaconda3\lib\site-packages\paddle\fluid\executor.py", line 785, in run
    use_program_cache=use_program_cache)
  File "C:\Anaconda3\lib\site-packages\paddle\fluid\executor.py", line 838, in _run_impl
    use_program_cache=use_program_cache)
  File "C:\Anaconda3\lib\site-packages\paddle\fluid\executor.py", line 912, in _run_program
    fetch_var_name)
paddle.fluid.core_avx.EnforceNotMet: 

--------------------------------------------
C++ Call Stacks (More useful to developers):
--------------------------------------------
Windows not support stack backtrace yet.

------------------------------------------
Python Call Stacks (More useful to users):
------------------------------------------
  File "C:\Anaconda3\lib\site-packages\paddle\fluid\framework.py", line 2525, in append_op
    attrs=kwargs.get("attrs", None))
  File "C:\Anaconda3\lib\site-packages\paddleslim\dist\single_distiller.py", line 95, in merge
    type=op.type, inputs=inputs, outputs=outputs, attrs=attrs)
  File "C:/Users/虹虹/Desktop/加油4/qinglianghua_python/copy999999999999999.py", line 111, in <module>
    main = slim.dist.merge(teacher_program, student_program, data_name_map, fluid.CPUPlace())

----------------------
Error Message Summary:
----------------------
InvalidArgumentError: The Tensor in the conv2d Op's Input Variable Input(teacher_data) is not initialized.
  [Hint: Expected t->IsInitialized() == true, but received t->IsInitialized():0 != true:1.] at (D:\1.7.2\paddle\paddle\fluid\framework\operator.cc:1264)
  [operator < conv2d > error]

源代码：

1. 导入依赖
import paddle
import os
import paddle.fluid as fluid
import paddleslim as slim
import models
import reader
import numpy as np

定义student_program和teacher_program

tokens_student = [5, 13, 0, 0]  #1个
tokens_teacher = [5,13,0,0,3,1,2,1,4,16,0,1,3,18,0,0]  #4个

student_program = fluid.Program()
student_startup = fluid.Program()

with fluid.program_guard(student_program, student_startup):

        config1 = [('MobileNetV2BlockSpace', {'input_size': 224, 'output_size': 112, 'block_num': 1})]
        sanas1 = slim.nas.SANAS(configs=config1, server_addr=("", 9887), init_temperature=1000, reduce_rate=0.99,
                            search_steps=None, init_tokens=None, save_checkpoint='/home/aistudio/work/nas_checkpoint',
                            load_checkpoint=None, is_server=True)
        model1 = sanas1.tokens2arch(tokens_student)[0]

        image = fluid.data(name='data', shape=[None, 3, 224, 224], dtype='float32')
        label = fluid.data(name='label', shape=[None, 1], dtype='int64')

        output = model1(image)
        out = fluid.layers.fc(input=output, size=5)

        cost = fluid.layers.cross_entropy(input=out, label=label)
        avg_cost = fluid.layers.mean(x=cost)
        acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)

place = fluid.CPUPlace()
exe = fluid.Executor(place)

teacher_program = fluid.Program()
teacher_startup = fluid.Program()

with fluid.program_guard(teacher_program, teacher_startup):
        with fluid.unique_name.guard():

                config3 = [('MobileNetV2BlockSpace', {'input_size': 224, 'output_size': 14, 'block_num': 4})]
                sanas3 = slim.nas.SANAS(configs=config3, server_addr=("", 9997), init_temperature=1000, reduce_rate=0.99,
                                search_steps=None, init_tokens=None,
                                save_checkpoint='/home/aistudio/work/nas_checkpoint', load_checkpoint=None,
                                is_server=True)
                model3 = sanas3.tokens2arch(tokens_teacher)[0]

                image3 = fluid.data(name='data', shape=[None, 3, 224, 224], dtype='float32')
                label3 = fluid.data(name='label', shape=[None, 1], dtype='int64')

                output3 = model3(image3)
                out3 = fluid.layers.fc(input=output3, size=5)

                cost3 = fluid.layers.cross_entropy(input=out3, label=label3)
                avg_cost3= fluid.layers.mean(x=cost3)
                acc_top13 = fluid.layers.accuracy(input=out3, label=label3, k=1)

exe.run(teacher_startup)

合并program (merge)并添加蒸馏loss

data_name_map = {'image3': 'image'}
main = slim.dist.merge(teacher_program, student_program, data_name_map, fluid.CPUPlace())

with fluid.program_guard(student_program, student_startup):

        l2_loss = slim.dist.l2_loss('teacher_depthwise_conv2d_0.tmp_0', 'depthwise_conv2d_0.tmp_0', student_program)
        print(l2_loss)
        loss = l2_loss + avg_cost

        opt = fluid.optimizer.Momentum(0.01, 0.9)
        opt.minimize(loss)

exe.run(student_startup)

模型训练

train_reader = paddle.batch(reader=reader.train_reader('target/train.list',224,250), batch_size=64, drop_last=True)
train_feeder = fluid.DataFeeder(place=place, feed_list=[image, label])

for pass_id in range(100):

      for batch_id, data in enumerate(train_reader()):

            acc1, loss_np = exe.run(student_program, feed=train_feeder.feed(data), fetch_list=[acc_top1.name, avg_cost.name])

            print('Pass:%d, Batch:%d, Acc1:%0.5f, Loss:%0.5f' % (pass_id, batch_id, acc1.mean(), loss_np.mean()))

PaddlePaddle / PaddleSlim 大约 1 年 前同步成功

蒸馏模型训练报错

PaddlePaddle / PaddleSlim
大约 1 年前同步成功