mig code to 2.0 version (#510)

40280329 · Bai Yifan · GitHub · 72bcb1eb · 40280329 · 40280329
27 changed file
--- a/demo/deep_mutual_learning/dml_train.py
+++ b/demo/deep_mutual_learning/dml_train.py
@@ -21,8 +21,7 @@ import sys
 import argparse
 import functools
 import logging
-import paddle.fluid as fluid
-from paddle.fluid.dygraph.base import to_variable
+import paddle
 from paddleslim.common import AvgrageMeter, get_logger
 from paddleslim.dist import DML
 from paddleslim.models.dygraph import MobileNetV1
@@ -46,28 +45,27 @@ add_arg('epochs',            int,   200,             "Epoch number.")
 add_arg('class_num',         int,   100,             "Class number of dataset.")
 add_arg('trainset_num',      int,   50000,           "Images number of trainset.")
 add_arg('model_save_dir',    str,   'saved_models',  "The path to save model.")
-add_arg('use_parallel',      bool,  False,           "Whether to use data parallel mode to train the model.")
 # yapf: enable


 def create_optimizer(models, args):
-    device_num = fluid.dygraph.parallel.Env().nranks
-    step = int(args.trainset_num / (args.batch_size * device_num))
+    step = int(args.trainset_num / (args.batch_size))
    epochs = [60, 120, 180]
    bd = [step * e for e in epochs]
    lr = [args.init_lr * (0.1**i) for i in range(len(bd) + 1)]

    optimizers = []
    for cur_model in models:
-        learning_rate = fluid.dygraph.PiecewiseDecay(bd, lr, 0)
-        opt = fluid.optimizer.MomentumOptimizer(
+        learning_rate = paddle.optimizer.lr.PiecewiseDecay(
+            boundaries=bd, values=lr)
+        opt = paddle.optimizer.Momentum(
            learning_rate,
            0.9,
-            parameter_list=cur_model.parameters(),
+            parameters=cur_model.parameters(),
            use_nesterov=True,
-            regularization=fluid.regularizer.L2DecayRegularizer(5e-4))
+            weight_decay=paddle.regularizer.L2Decay(5e-4))
        optimizers.append(opt)
-    return optimizers
+    return optimizers, learning_rate


 def create_reader(place, args):
@@ -75,33 +73,31 @@ def create_reader(place, args):
        batch_size=args.batch_size, is_train=True, is_shuffle=True)
    valid_reader = reader.train_valid(
        batch_size=args.batch_size, is_train=False, is_shuffle=False)
-    if args.use_parallel:
-        train_reader = fluid.contrib.reader.distributed_batch_reader(
-            train_reader)
-    train_loader = fluid.io.DataLoader.from_generator(
+    train_loader = paddle.io.DataLoader.from_generator(
        capacity=1024, return_list=True)
-    valid_loader = fluid.io.DataLoader.from_generator(
+    valid_loader = paddle.io.DataLoader.from_generator(
        capacity=1024, return_list=True)
    train_loader.set_batch_generator(train_reader, places=place)
    valid_loader.set_batch_generator(valid_reader, places=place)
    return train_loader, valid_loader


-def train(train_loader, dml_model, dml_optimizer, args):
+def train(train_loader, dml_model, dml_optimizer, lr, args):
    dml_model.train()
    costs = [AvgrageMeter() for i in range(dml_model.model_num)]
    accs = [AvgrageMeter() for i in range(dml_model.model_num)]
    for step_id, (images, labels) in enumerate(train_loader):
-        images, labels = to_variable(images), to_variable(labels)
+        images, labels = paddle.to_tensor(images), paddle.to_tensor(labels)
        batch_size = images.shape[0]

        logits = dml_model.forward(images)
        precs = [
-            fluid.layers.accuracy(
+            paddle.metric.accuracy(
                input=l, label=labels, k=1) for l in logits
        ]
        losses = dml_model.loss(logits, labels)
        dml_optimizer.minimize(losses)
+        lr.step()

        for i in range(dml_model.model_num):
            accs[i].update(precs[i].numpy(), batch_size)
@@ -121,12 +117,12 @@ def valid(valid_loader, dml_model, args):
    costs = [AvgrageMeter() for i in range(dml_model.model_num)]
    accs = [AvgrageMeter() for i in range(dml_model.model_num)]
    for step_id, (images, labels) in enumerate(valid_loader):
-        images, labels = to_variable(images), to_variable(labels)
+        images, labels = paddle.to_tensor(images), paddle.to_tensor(labels)
        batch_size = images.shape[0]

        logits = dml_model.forward(images)
        precs = [
-            fluid.layers.accuracy(
+            paddle.metric.accuracy(
                input=l, label=labels, k=1) for l in logits
        ]
        losses = dml_model.loss(logits, labels)
@@ -146,65 +142,48 @@ def valid(valid_loader, dml_model, args):

 def main(args):
    if not args.use_gpu:
-        place = fluid.CPUPlace()
-    elif not args.use_parallel:
-        place = fluid.CUDAPlace(0)
+        place = paddle.CPUPlace()
    else:
-        place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id)
-
-    with fluid.dygraph.guard(place):
-        # 1. Define data reader
-        train_loader, valid_loader = create_reader(place, args)
-
-        # 2. Define neural network
-        if args.models == "mobilenet-mobilenet":
-            models = [
-                MobileNetV1(class_dim=args.class_num),
-                MobileNetV1(class_dim=args.class_num)
-            ]
-        elif args.models == "mobilenet-resnet50":
-            models = [
-                MobileNetV1(class_dim=args.class_num),
-                ResNet(class_dim=args.class_num)
-            ]
-        else:
-            logger.info("You can define the model as you wish")
-            return
-        optimizers = create_optimizer(models, args)
-
-        # 3. Use PaddleSlim DML strategy
-        dml_model = DML(models, args.use_parallel)
-        dml_optimizer = dml_model.opt(optimizers)
-
-        # 4. Train your network
-        save_parameters = (not args.use_parallel) or (
-            args.use_parallel and fluid.dygraph.parallel.Env().local_rank == 0)
-        best_valid_acc = [0] * dml_model.model_num
-        for epoch_id in range(args.epochs):
-            current_step_lr = dml_optimizer.get_lr()
-            lr_msg = "Epoch {}".format(epoch_id)
-            for model_id, lr in enumerate(current_step_lr):
-                lr_msg += ", {} lr: {:.6f}".format(
-                    dml_model.full_name()[model_id], lr)
-            logger.info(lr_msg)
-            train_losses, train_accs = train(train_loader, dml_model,
-                                             dml_optimizer, args)
-            valid_losses, valid_accs = valid(valid_loader, dml_model, args)
-            for i in range(dml_model.model_num):
-                if valid_accs[i].avg[0] > best_valid_acc[i]:
-                    best_valid_acc[i] = valid_accs[i].avg[0]
-                    if save_parameters:
-                        fluid.save_dygraph(
-                            models[i].state_dict(),
-                            os.path.join(args.model_save_dir,
-                                         dml_model.full_name()[i],
-                                         "best_model"))
-                summery_msg = "Epoch {} {}: valid_loss {:.6f}, valid_acc {:.6f}, best_valid_acc {:.6f}"
-                logger.info(
-                    summery_msg.format(epoch_id,
-                                       dml_model.full_name()[i], valid_losses[
-                                           i].avg[0], valid_accs[i].avg[0],
-                                       best_valid_acc[i]))
+        place = paddle.CUDAPlace(0)
+
+    # 1. Define data reader
+    train_loader, valid_loader = create_reader(place, args)
+
+    # 2. Define neural network
+    if args.models == "mobilenet-mobilenet":
+        models = [
+            MobileNetV1(class_dim=args.class_num),
+            MobileNetV1(class_dim=args.class_num)
+        ]
+    elif args.models == "mobilenet-resnet50":
+        models = [
+            MobileNetV1(class_dim=args.class_num),
+            ResNet(class_dim=args.class_num)
+        ]
+    else:
+        logger.info("You can define the model as you wish")
+        return
+    optimizers, lr = create_optimizer(models, args)
+
+    # 3. Use PaddleSlim DML strategy
+    dml_model = DML(models)
+    dml_optimizer = dml_model.opt(optimizers)
+
+    # 4. Train your network
+    best_valid_acc = [0] * dml_model.model_num
+    for epoch_id in range(args.epochs):
+        train_losses, train_accs = train(train_loader, dml_model, dml_optimizer,
+                                         lr, args)
+        valid_losses, valid_accs = valid(valid_loader, dml_model, args)
+        for i in range(dml_model.model_num):
+            if valid_accs[i].avg[0] > best_valid_acc[i]:
+                best_valid_acc[i] = valid_accs[i].avg[0]
+            summery_msg = "Epoch {} {}: valid_loss {:.6f}, valid_acc {:.6f}, best_valid_acc {:.6f}"
+            logger.info(
+                summery_msg.format(epoch_id,
+                                   dml_model.full_name()[i], valid_losses[
+                                       i].avg[0], valid_accs[i].avg[0],
+                                   best_valid_acc[i]))


 if __name__ == '__main__':

--- a/demo/distillation/distill.py
+++ b/demo/distillation/distill.py
@@ -87,15 +87,14 @@ def create_optimizer(args):

 def compress(args):
    if args.data == "cifar10":
-        import paddle.dataset.cifar as reader
-        train_reader = reader.train10()
-        val_reader = reader.test10()
+        train_dataset = paddle.vision.datasets.Cifar10(mode='train')
+        val_dataset = paddle.vision.datasets.Cifar10(mode='test')
        class_dim = 10
        image_shape = "3,32,32"
    elif args.data == "imagenet":
        import imagenet_reader as reader
-        train_reader = reader.train()
-        val_reader = reader.val()
+        train_dataset = reader.ImageNetDataset(mode='train')
+        val_dataset = reader.ImageNetDataset(mode='val')
        class_dim = 1000
        image_shape = "3,224,224"
    else:
@@ -106,6 +105,9 @@ def compress(args):
                                                                     model_list)
    student_program = paddle.static.Program()
    s_startup = paddle.static.Program()
+    places = paddle.static.cuda_places(
+    ) if args.use_gpu else paddle.static.cpu_places()
+    place = places[0]

    with paddle.static.program_guard(student_program, s_startup):
        with paddle.fluid.unique_name.guard():
@@ -113,16 +115,23 @@ def compress(args):
                name='image', shape=[None] + image_shape, dtype='float32')
            label = paddle.static.data(
                name='label', shape=[None, 1], dtype='int64')
-            train_loader = paddle.io.DataLoader.from_generator(
+            train_loader = paddle.io.DataLoader(
+                train_dataset,
+                places=places,
                feed_list=[image, label],
-                capacity=64,
-                use_double_buffer=True,
-                iterable=True)
-            valid_loader = paddle.io.DataLoader.from_generator(
+                drop_last=True,
+                batch_size=args.batch_size,
+                shuffle=True,
+                use_shared_memory=False,
+                num_workers=1)
+            valid_loader = paddle.io.DataLoader(
+                val_dataset,
+                places=place,
                feed_list=[image, label],
-                capacity=64,
-                use_double_buffer=True,
-                iterable=True)
+                drop_last=False,
+                use_shared_memory=False,
+                batch_size=args.batch_size,
+                shuffle=False)
            # model definition
            model = models.__dict__[args.model]()
            out = model.net(input=image, class_dim=class_dim)
@@ -132,20 +141,9 @@ def compress(args):
            acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
            acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)

-    train_reader = paddle.batch(
-        train_reader, batch_size=args.batch_size, drop_last=True)
-    val_reader = paddle.batch(
-        val_reader, batch_size=args.batch_size, drop_last=True)
    val_program = student_program.clone(for_test=True)
-
-    places = paddle.static.cuda_places(
-    ) if args.use_gpu else paddle.static.cpu_places()
-    place = places[0]
    exe = paddle.static.Executor(place)

-    train_loader.set_sample_list_generator(train_reader, places)
-    valid_loader.set_sample_list_generator(val_reader, place)
-
    teacher_model = models.__dict__[args.teacher_model]()
    # define teacher program
    teacher_program = paddle.static.Program()

--- a/demo/distillation/image_classification_distillation_tutorial.ipynb
+++ b/demo/distillation/image_classification_distillation_tutorial.ipynb
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# PaddleSlim Distillation知识蒸馏简介与实验\n",
-    "\n",
-    "一般情况下，模型参数量越多，结构越复杂，其性能越好，但参数也越冗余，运算量和资源消耗也越大。**知识蒸馏**就是一种将大模型学习到的有用信息（Dark Knowledge）压缩进更小更快的模型，而获得可以匹敌大模型结果的方法。\n",
-    "\n",
-    "在本文中性能强劲的大模型被称为teacher, 性能稍逊但体积较小的模型被称为student。示例包含以下步骤：\n",
-    "\n",
-    "1. 导入依赖\n",
-    "2. 定义student_program和teacher_program\n",
-    "3. 选择特征图\n",
-    "4. 合并program (merge)并添加蒸馏loss\n",
-    "5. 模型训练\n",
-    "\n",
-    "\n",
-    "## 1. 导入依赖\n",
-    "PaddleSlim依赖Paddle1.7版本，请确认已正确安装Paddle，然后按以下方式导入Paddle、PaddleSlim以及其他依赖:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import paddle\n",
-    "import paddle.fluid as fluid\n",
-    "import paddleslim as slim\n",
-    "import sys\n",
-    "sys.path.append(\"../\")\n",
-    "import models"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 2. 定义student_program和teacher_program\n",
-    "\n",
-    "本教程在MNIST数据集上进行知识蒸馏的训练和验证，输入图片尺寸为`[1, 28, 28]`，输出类别数为10。\n",
-    "选择`ResNet50`作为teacher对`MobileNet`结构的student进行蒸馏训练。"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "model = models.__dict__['MobileNet']()\n",
-    "student_program = fluid.Program()\n",
-    "student_startup = fluid.Program()\n",
-    "with fluid.program_guard(student_program, student_startup):\n",
-    "    image = fluid.data(\n",
-    "        name='image', shape=[None] + [1, 28, 28], dtype='float32')\n",
-    "    label = fluid.data(name='label', shape=[None, 1], dtype='int64')\n",
-    "    out = model.net(input=image, class_dim=10)\n",
-    "    cost = fluid.layers.cross_entropy(input=out, label=label)\n",
-    "    avg_cost = fluid.layers.mean(x=cost)\n",
-    "    acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)\n",
-    "    acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "teacher_model = models.__dict__['ResNet50']()\n",
-    "teacher_program = fluid.Program()\n",
-    "teacher_startup = fluid.Program()\n",
-    "with fluid.program_guard(teacher_program, teacher_startup):\n",
-    "    with fluid.unique_name.guard():\n",
-    "        image = fluid.data(\n",
-    "            name='image', shape=[None] + [1, 28, 28], dtype='float32')\n",
-    "        predict = teacher_model.net(image, class_dim=10)\n",
-    "exe = fluid.Executor(fluid.CPUPlace())\n",
-    "exe.run(teacher_startup)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 3. 选择特征图\n",
-    "我们可以用student_的list_vars方法来观察其中全部的Variables，从中选出一个或多个变量（Variable）来拟合teacher相应的变量。"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# get all student variables\n",
-    "student_vars = []\n",
-    "for v in student_program.list_vars():\n",
-    "    student_vars.append((v.name, v.shape))\n",
-    "#uncomment the following lines to observe student's variables for distillation\n",
-    "#print(\"=\"*50+\"student_model_vars\"+\"=\"*50)\n",
-    "#print(student_vars)\n",
-    "\n",
-    "# get all teacher variables\n",
-    "teacher_vars = []\n",
-    "for v in teacher_program.list_vars():\n",
-    "    teacher_vars.append((v.name, v.shape))\n",
-    "#uncomment the following lines to observe teacher's variables for distillation\n",
-    "#print(\"=\"*50+\"teacher_model_vars\"+\"=\"*50)\n",
-    "#print(teacher_vars)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "经过筛选我们可以看到，teacher_program中的'bn5c_branch2b.output.1.tmp_3'和student_program的'depthwise_conv2d_11.tmp_0'尺寸一致，可以组成蒸馏损失函数。"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 4. 合并program (merge)并添加蒸馏loss\n",
-    "merge操作将student_program和teacher_program中的所有Variables和Op都将被添加到同一个Program中，同时为了避免两个program中有同名变量会引起命名冲突，merge也会为teacher_program中的Variables添加一个同一的命名前缀name_prefix，其默认值是'teacher_'\n",
-    "\n",
-    "为了确保teacher网络和student网络输入的数据是一样的，merge操作也会对两个program的输入数据层进行合并操作，所以需要指定一个数据层名称的映射关系data_name_map，key是teacher的输入数据名称，value是student的"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data_name_map = {'image': 'image'}\n",
-    "main = slim.dist.merge(teacher_program, student_program, data_name_map, fluid.CPUPlace())\n",
-    "with fluid.program_guard(student_program, student_startup):\n",
-    "    l2_loss = slim.dist.l2_loss('teacher_bn5c_branch2b.output.1.tmp_3', 'depthwise_conv2d_11.tmp_0', student_program)\n",
-    "    loss = l2_loss + avg_cost\n",
-    "    opt = fluid.optimizer.Momentum(0.01, 0.9)\n",
-    "    opt.minimize(loss)\n",
-    "exe.run(student_startup)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 5. 模型训练\n",
-    "\n",
-    "为了快速执行该示例，我们选取简单的MNIST数据，Paddle框架的`paddle.dataset.mnist`包定义了MNIST数据的下载和读取。\n",
-    "代码如下："
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "train_reader = paddle.fluid.io.batch(\n",
-    "    paddle.dataset.mnist.train(), batch_size=128, drop_last=True)\n",
-    "train_feeder = fluid.DataFeeder(['image', 'label'], fluid.CPUPlace(), student_program)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for data in train_reader():\n",
-    "    acc1, acc5, loss_np = exe.run(student_program, feed=train_feeder.feed(data), fetch_list=[acc_top1.name, acc_top5.name, loss.name])\n",
-    "    print(\"Acc1: {:.6f}, Acc5: {:.6f}, Loss: {:.6f}\".format(acc1.mean(), acc5.mean(), loss_np.mean()))"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 1
-}
--- a/demo/quant/pact_quant_aware/README.md
+++ b/demo/quant/pact_quant_aware/README.md
@@ -37,27 +37,27 @@ PACT方法属于自定义 `act_preprocess_func`, 输入是将要量化的激活

 ```
 import paddle
-import paddle.fluid as fluid
 from paddle.fluid.layer_helper import LayerHelper

-def pact(x, name=None):
+def pact(x):
    helper = LayerHelper("pact", **locals())
    dtype = 'float32'
    # 定义PACT初始阈值
    init_thres = 20
-    u_param_attr = fluid.ParamAttr(
+    u_param_attr = paddle.ParamAttr(
        name=x.name + '_pact',
-        initializer=fluid.initializer.ConstantInitializer(value=init_thres),
-        regularizer=fluid.regularizer.L2Decay(0.0001),
+        initializer=paddle.nn.initializer.Constant(value=init_thres),
+        regularizer=paddle.regularizer.L2Decay(0.0001),
        learning_rate=1)
    u_param = helper.create_parameter(
        attr=u_param_attr, shape=[1], dtype=dtype)
-    x = fluid.layers.elementwise_sub(
-        x, fluid.layers.relu(fluid.layers.elementwise_sub(x, u_param)))
-    x = fluid.layers.elementwise_add(
-        x, fluid.layers.relu(fluid.layers.elementwise_sub(-u_param, x)))

+    part_a = paddle.nn.functional.relu(x - u_param)
+    part_b = paddle.nn.functional.relu(-u_param - x)
+    x = x - part_a + part_b
    return x
+
+
 ```

 函数中可以定义初始阈值，和初始阈值的l2正则项系数，在训练过程中可根据梯度传播训练阈值为一个合适的值。
@@ -66,7 +66,7 @@ def pact(x, name=None):

 ```
 def get_optimizer():
-    return fluid.optimizer.MomentumOptimizer(0.001, 0.9)
+    return paddle.optimizer.Momentum(args.lr, 0.9)
 ```
 因为除了PACT阈值以外，其他参数都是训练好的，因此在训练时可以将PACT中阈值的学习率调大一些。

@@ -144,10 +144,10 @@ compiled_train_prog = quant_aware(train_prog, place, quant_config, scope=None, a
 ### 关掉指定build策略

 ```
-build_strategy = fluid.BuildStrategy()
+build_strategy = paddle.static.BuildStrategy()
 build_strategy.fuse_all_reduce_ops = False
 build_strategy.sync_batch_norm = False
-exec_strategy = fluid.ExecutionStrategy()
+exec_strategy = paddle.static.ExecutionStrategy()
 compiled_train_prog = compiled_train_prog.with_data_parallel(
        loss_name=avg_cost.name,
        build_strategy=build_strategy,

--- a/demo/quant/pact_quant_aware/train.py
+++ b/demo/quant/pact_quant_aware/train.py
@@ -110,15 +110,14 @@ def create_optimizer(args):
 def compress(args):

    if args.data == "mnist":
-        import paddle.dataset.mnist as reader
-        train_reader = reader.train()
-        val_reader = reader.test()
+        train_dataset = paddle.vision.datasets.MNIST(mode='train')
+        val_dataset = paddle.vision.datasets.MNIST(mode='test')
        class_dim = 10
        image_shape = "1,28,28"
    elif args.data == "imagenet":
        import imagenet_reader as reader
-        train_reader = reader.train()
-        val_reader = reader.val()
+        train_dataset = reader.ImageNetDataset(mode='train')
+        val_dataset = reader.ImageNetDataset(mode='val')
        class_dim = 1000
        image_shape = "3,224,224"
    else:
@@ -148,27 +147,27 @@ def compress(args):
        opt.minimize(avg_cost)

    place = paddle.CUDAPlace(0) if args.use_gpu else paddle.CPUPlace()
+    places = paddle.static.cuda_places(
+    ) if args.use_gpu else paddle.static.cpu_places()
    exe = paddle.static.Executor(place)
    exe.run(paddle.static.default_startup_program())

-    train_reader = paddle.batch(
-        train_reader, batch_size=args.batch_size, drop_last=True)
-    train_loader = paddle.io.DataLoader.from_generator(
+    train_loader = paddle.io.DataLoader(
+        train_dataset,
+        places=places,
        feed_list=[image, label],
-        capacity=512,
-        use_double_buffer=True,
-        iterable=True)
-    places = paddle.static.cuda_places(
-    ) if args.use_gpu else paddle.static.cpu_places()
-    train_loader.set_sample_list_generator(train_reader, places)
-
-    val_reader = paddle.batch(val_reader, batch_size=args.batch_size)
-    valid_loader = paddle.io.DataLoader.from_generator(
+        drop_last=True,
+        batch_size=args.batch_size,
+        shuffle=True,
+        num_workers=1)
+
+    valid_loader = paddle.io.DataLoader(
+        val_dataset,
+        places=place,
        feed_list=[image, label],
-        capacity=512,
-        use_double_buffer=True,
-        iterable=True)
-    valid_loader.set_sample_list_generator(val_reader, places[0])
+        drop_last=False,
+        batch_size=args.batch_size,
+        shuffle=False)

    if args.analysis:
        # get all activations names

--- a/demo/quant/quant_aware/README.md
+++ b/demo/quant/quant_aware/README.md
@@ -55,10 +55,10 @@ compiled_train_prog = quant_aware(train_prog, place, quant_config, scope=None, f
 ### 关掉指定build策略

 ```
-build_strategy = fluid.BuildStrategy()
+build_strategy = paddle.static.BuildStrategy()
 build_strategy.fuse_all_reduce_ops = False
 build_strategy.sync_batch_norm = False
-exec_strategy = fluid.ExecutionStrategy()
+exec_strategy = paddle.static.ExecutionStrategy()
 compiled_train_prog = compiled_train_prog.with_data_parallel(
        loss_name=avg_cost.name,
        build_strategy=build_strategy,

--- a/demo/quant/quant_aware/image_classification_training_aware_quantization_quick_start.ipynb
+++ b/demo/quant/quant_aware/image_classification_training_aware_quantization_quick_start.ipynb
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# 图像分类模型量化训练-快速开始"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "该教程以图像分类模型MobileNetV1为例，说明如何快速使用PaddleSlim的[量化训练接口](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/docs/docs/api/quantization_api.md)。 该示例包含以下步骤：\n",
-    "\n",
-    "1. 导入依赖\n",
-    "2. 构建模型\n",
-    "3. 训练模型\n",
-    "4. 量化\n",
-    "5. 训练和测试量化后的模型\n",
-    "6. 保存量化后的模型"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 1. 导入依赖\n",
-    "PaddleSlim依赖Paddle1.7版本，请确认已正确安装Paddle，然后按以下方式导入Paddle和PaddleSlim:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import paddle\n",
-    "import paddle.fluid as fluid\n",
-    "import paddleslim as slim\n",
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 2. 构建网络\n",
-    "该章节构造一个用于对MNIST数据进行分类的分类模型，选用`MobileNetV1`，并将输入大小设置为`[1, 28, 28]`，输出类别数为10。               为了方便展示示例，我们在`paddleslim.models`下预定义了用于构建分类模型的方法，执行以下代码构建分类模型：\n",
-    "\n",
-    ">注意：paddleslim.models下的API并非PaddleSlim常规API，是为了简化示例而封装预定义的一系列方法，比如：模型结构的定义、Program的构建等。"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "exe, train_program, val_program, inputs, outputs = \\\n",
-    "    slim.models.image_classification(\"MobileNet\", [1, 28, 28], 10, use_gpu=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 3. 训练模型\n",
-    "该章节介绍了如何定义输入数据和如何训练和测试分类模型。先训练分类模型的原因是量化训练过程是在训练好的模型上进行的，也就是说是在训练好的模型的基础上加入量化反量化op之后，用小学习率进行参数微调。\n",
-    "\n",
-    "### 3.1 定义输入数据\n",
-    "\n",
-    "为了快速执行该示例，我们选取简单的MNIST数据，Paddle框架的`paddle.dataset.mnist`包定义了MNIST数据的下载和读取。\n",
-    "代码如下："
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import paddle.dataset.mnist as reader\n",
-    "train_reader = paddle.fluid.io.batch(\n",
-    "        reader.train(), batch_size=128, drop_last=True)\n",
-    "test_reader = paddle.fluid.io.batch(\n",
-    "        reader.train(), batch_size=128, drop_last=True)\n",
-    "train_feeder = fluid.DataFeeder(inputs, fluid.CPUPlace())"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 3.2 训练和测试\n",
-    "先定义训练和测试函数，正常训练和量化训练时只需要调用函数即可。在训练函数中执行了一个epoch的训练，因为MNIST数据集数据较少，一个epoch就可将top1精度训练到95%以上。"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def train(prog):\n",
-    "    iter = 0\n",
-    "    for data in train_reader():\n",
-    "        acc1, acc5, loss = exe.run(prog, feed=train_feeder.feed(data), fetch_list=outputs)\n",
-    "        if iter % 100 == 0:\n",
-    "            print('train iter={}, top1={}, top5={}, loss={}'.format(iter, acc1.mean(), acc5.mean(), loss.mean()))\n",
-    "        iter += 1\n",
-    "        \n",
-    "def test(prog):\n",
-    "    iter = 0\n",
-    "    res = [[], []]\n",
-    "    for data in train_reader():\n",
-    "        acc1, acc5, loss = exe.run(prog, feed=train_feeder.feed(data), fetch_list=outputs)\n",
-    "        if iter % 100 == 0:\n",
-    "            print('test iter={}, top1={}, top5={}, loss={}'.format(iter, acc1.mean(), acc5.mean(), loss.mean()))\n",
-    "        res[0].append(acc1.mean())\n",
-    "        res[1].append(acc5.mean())\n",
-    "        iter += 1\n",
-    "    print('final test result top1={}, top5={}'.format(np.array(res[0]).mean(), np.array(res[1]).mean()))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "调用``train``函数训练分类网络，``train_program``是在第2步：构建网络中定义的。"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "train iter=0, top1=0.1171875, top5=0.546875, loss=2.79680204391\n",
-      "train iter=100, top1=0.9296875, top5=1.0, loss=0.305284500122\n",
-      "train iter=200, top1=0.9609375, top5=0.9921875, loss=0.158525630832\n",
-      "train iter=300, top1=0.9609375, top5=0.9921875, loss=0.146427512169\n",
-      "train iter=400, top1=0.9609375, top5=1.0, loss=0.179066047072\n"
-     ]
-    }
-   ],
-   "source": [
-    "train(train_program)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "调用``test``函数测试分类网络，``val_program``是在第2步：构建网络中定义的。"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "test iter=0, top1=0.96875, top5=1.0, loss=0.0801232308149\n",
-      "test iter=100, top1=0.9609375, top5=1.0, loss=0.104892581701\n",
-      "test iter=200, top1=0.96875, top5=1.0, loss=0.156774014235\n",
-      "test iter=300, top1=0.984375, top5=1.0, loss=0.0931615754962\n",
-      "test iter=400, top1=0.9453125, top5=1.0, loss=0.184863254428\n",
-      "final test result top1=0.970469415188, top5=0.999282181263\n"
-     ]
-    }
-   ],
-   "source": [
-    "test(val_program)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 4. 量化\n",
-    "\n",
-    "按照[默认配置](https://paddlepaddle.github.io/PaddleSlim/api/quantization_api/#_1)在``train_program``和``val_program``中加入量化和反量化op."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2020-02-06 09:08:49,489-INFO: quant_aware config {'moving_rate': 0.9, 'weight_quantize_type': 'channel_wise_abs_max', 'is_full_quantize': False, 'dtype': 'int8', 'weight_bits': 8, 'window_size': 10000, 'activation_bits': 8, 'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'], 'not_quant_pattern': ['skip_quant'], 'activation_quantize_type': 'moving_average_abs_max', 'for_tensorrt': False}\n",
-      "2020-02-06 09:08:50,943-INFO: quant_aware config {'moving_rate': 0.9, 'weight_quantize_type': 'channel_wise_abs_max', 'is_full_quantize': False, 'dtype': 'int8', 'weight_bits': 8, 'window_size': 10000, 'activation_bits': 8, 'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'], 'not_quant_pattern': ['skip_quant'], 'activation_quantize_type': 'moving_average_abs_max', 'for_tensorrt': False}\n"
-     ]
-    }
-   ],
-   "source": [
-    "quant_program = slim.quant.quant_aware(train_program, exe.place, for_test=False)\n",
-    "val_quant_program = slim.quant.quant_aware(val_program, exe.place, for_test=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 5. 训练和测试量化后的模型\n",
-    "微调量化后的模型，训练一个epoch后测试。"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "train iter=0, top1=0.953125, top5=1.0, loss=0.184170544147\n",
-      "train iter=100, top1=0.96875, top5=1.0, loss=0.0945074558258\n",
-      "train iter=200, top1=0.9765625, top5=1.0, loss=0.0915599390864\n",
-      "train iter=300, top1=0.9765625, top5=1.0, loss=0.0562560297549\n",
-      "train iter=400, top1=0.9609375, top5=1.0, loss=0.094195574522\n"
-     ]
-    }
-   ],
-   "source": [
-    "train(quant_program)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "测试量化后的模型，和``3.2 训练和测试``中得到的测试结果相比，精度相近，达到了无损量化。"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "test iter=0, top1=0.984375, top5=1.0, loss=0.0542894415557\n",
-      "test iter=100, top1=0.9609375, top5=1.0, loss=0.0662319809198\n",
-      "test iter=200, top1=0.9609375, top5=1.0, loss=0.0832970961928\n",
-      "test iter=300, top1=0.9921875, top5=1.0, loss=0.0262515246868\n",
-      "test iter=400, top1=0.96875, top5=1.0, loss=0.123742781579\n",
-      "final test result top1=0.984057843685, top5=0.999799668789\n"
-     ]
-    }
-   ],
-   "source": [
-    "test(val_quant_program)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 6. 保存量化后的模型\n",
-    "\n",
-    "在``4. 量化``中使用接口``slim.quant.quant_aware``接口得到的模型只适合训练时使用，为了得到最终使用时的模型，需要使用[slim.quant.convert](https://paddlepaddle.github.io/PaddleSlim/api/quantization_api/#convert)接口，然后使用[fluid.io.save_inference_model](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api_cn/io_cn/save_inference_model_cn.html#save-inference-model)保存模型。``float_prog``的参数数据类型是float32，但是数据范围是int8, 保存之后可使用fluid或者paddle-lite加载使用，paddle-lite在使用时，会先将类型转换为int8。``int8_prog``的参数数据类型是int8, 保存后可看到量化后模型大小，不可加载使用。"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2020-02-06 09:09:27,529-INFO: convert config {'moving_rate': 0.9, 'weight_quantize_type': 'channel_wise_abs_max', 'is_full_quantize': False, 'dtype': 'int8', 'weight_bits': 8, 'window_size': 10000, 'activation_bits': 8, 'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'], 'not_quant_pattern': ['skip_quant'], 'activation_quantize_type': 'moving_average_abs_max', 'for_tensorrt': False}\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "[u'save_infer_model/scale_0',\n",
-       " u'save_infer_model/scale_1',\n",
-       " u'save_infer_model/scale_2']"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "float_prog, int8_prog = slim.quant.convert(val_quant_program, exe.place, save_int8=True)\n",
-    "target_vars = [float_prog.global_block().var(name) for name in outputs]\n",
-    "fluid.io.save_inference_model(dirname='./inference_model/float',\n",
-    "        feeded_var_names=[var.name for var in inputs],\n",
-    "        target_vars=target_vars,\n",
-    "        executor=exe,\n",
-    "        main_program=float_prog)\n",
-    "fluid.io.save_inference_model(dirname='./inference_model/int8',\n",
-    "        feeded_var_names=[var.name for var in inputs],\n",
-    "        target_vars=target_vars,\n",
-    "        executor=exe,\n",
-    "        main_program=int8_prog)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 2",
-   "language": "python",
-   "name": "python2"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.12"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
--- a/demo/quant/quant_aware/train.py
+++ b/demo/quant/quant_aware/train.py
@@ -105,18 +105,15 @@ def compress(args):
        'moving_rate': 0.9,
    }

-    train_reader = None
-    test_reader = None
    if args.data == "mnist":
-        import paddle.dataset.mnist as reader
-        train_reader = reader.train()
-        val_reader = reader.test()
+        train_dataset = paddle.vision.datasets.MNIST(mode='train')
+        val_dataset = paddle.vision.datasets.MNIST(mode='test')
        class_dim = 10
        image_shape = "1,28,28"
    elif args.data == "imagenet":
        import imagenet_reader as reader
-        train_reader = reader.train()
-        val_reader = reader.val()
+        train_dataset = reader.ImageNetDataset(mode='train')
+        val_dataset = reader.ImageNetDataset(mode='val')
        class_dim = 1000
        image_shape = "3,224,224"
    else:
@@ -162,24 +159,26 @@ def compress(args):
    if args.pretrained_model:
        paddle.static.load(train_prog, args.pretrained_model, exe)

-    val_reader = paddle.batch(val_reader, batch_size=args.batch_size)
-    train_reader = paddle.batch(
-        train_reader, batch_size=args.batch_size, drop_last=True)
    places = paddle.static.cuda_places(
    ) if args.use_gpu else paddle.static.cpu_places()

-    train_loader = paddle.io.DataLoader.from_generator(
+    train_loader = paddle.io.DataLoader(
+        train_dataset,
+        places=places,
        feed_list=[image, label],
-        capacity=512,
-        use_double_buffer=True,
-        iterable=True)
-    valid_loader = paddle.io.DataLoader.from_generator(
+        drop_last=True,
+        batch_size=args.batch_size,
+        use_shared_memory=False,
+        shuffle=True,
+        num_workers=1)
+    valid_loader = paddle.io.DataLoader(
+        val_dataset,
+        places=place,
        feed_list=[image, label],
-        capacity=512,
-        use_double_buffer=True,
-        iterable=True)
-    train_loader.set_sample_list_generator(train_reader, places)
-    valid_loader.set_sample_list_generator(val_reader, places[0])
+        drop_last=False,
+        batch_size=args.batch_size,
+        use_shared_memory=False,
+        shuffle=False)

    def test(epoch, program):
        batch_id = 0

--- a/demo/quant/quant_embedding/cluster_train.py
+++ b/demo/quant/quant_embedding/cluster_train.py
@@ -7,13 +7,12 @@ import math
 import random
 import numpy as np
 import paddle
-import paddle.fluid as fluid
 import six
 import reader
 from net import skip_gram_word2vec

 logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
-logger = logging.getLogger("fluid")
+logger = logging.getLogger("paddle")
 logger.setLevel(logging.INFO)


@@ -113,20 +112,20 @@ def convert_python_to_tensor(weight, batch_size, sample_reader):
            if len(result[0]) == batch_size:
                tensor_result = []
                for tensor in result:
-                    t = fluid.Tensor()
+                    t = paddle.fluid.Tensor()
                    dat = np.array(tensor, dtype='int64')
                    if len(dat.shape) > 2:
                        dat = dat.reshape((dat.shape[0], dat.shape[2]))
                    elif len(dat.shape) == 1:
                        dat = dat.reshape((-1, 1))
-                    t.set(dat, fluid.CPUPlace())
+                    t.set(dat, paddle.CPUPlace())
                    tensor_result.append(t)
-                tt = fluid.Tensor()
+                tt = paddle.fluid.Tensor()
                neg_array = cs.searchsorted(np.random.sample(args.nce_num))
                neg_array = np.tile(neg_array, batch_size)
                tt.set(
                    neg_array.reshape((batch_size, args.nce_num)),
-                    fluid.CPUPlace())
+                    paddle.CPUPlace())
                tensor_result.append(tt)
                yield tensor_result
                result = [[], []]
@@ -134,15 +133,15 @@ def convert_python_to_tensor(weight, batch_size, sample_reader):
    return __reader__


-def train_loop(args, train_program, reader, py_reader, loss, trainer_id,
-               weight):
+def train_loop(args, train_program, reader, py_reader, loss, trainer_id, weight,
+               lr):

    py_reader.decorate_tensor_provider(
        convert_python_to_tensor(weight, args.batch_size, reader.train()))

-    place = fluid.CPUPlace()
-    exe = fluid.Executor(place)
-    exe.run(fluid.default_startup_program())
+    place = paddle.CPUPlace()
+    exe = paddle.static.Executor(place)
+    exe.run(paddle.static.default_startup_program())

    print("CPU_NUM:" + str(os.getenv("CPU_NUM")))

@@ -173,23 +172,24 @@ def train_loop(args, train_program, reader, py_reader, loss, trainer_id,
                            os.getenv("CPU_NUM"))
                        logger.info("Time used: {}, Samples/Sec: {}".format(
                            elapsed, samples / elapsed))
+                lr.step()

                if batch_id % args.save_step == 0 and batch_id != 0:
                    model_dir = args.model_output_dir + '/pass-' + str(
                        pass_id) + ('/batch-' + str(batch_id))
                    if trainer_id == 0:
-                        fluid.io.save_params(executor=exe, dirname=model_dir)
+                        paddle.static.save(exe, model_dir, train_program)
                        print("model saved in %s" % model_dir)
                batch_id += 1

-        except fluid.core.EOFException:
+        except paddle.fluid.core.EOFException:
            py_reader.reset()
            epoch_end = time.time()
            logger.info("Epoch: {0}, Train total expend: {1} ".format(
                pass_id, epoch_end - epoch_start))
            model_dir = args.model_output_dir + '/pass-' + str(pass_id)
            if trainer_id == 0:
-                fluid.io.save_params(executor=exe, dirname=model_dir)
+                paddle.static.save(exe, model_dir, train_program)
                print("model saved in %s" % model_dir)


@@ -203,8 +203,8 @@ def train(args):
        os.mkdir(args.model_output_dir)

    filelist = GetFileList(args.train_data_dir)
-    word2vec_reader = reader.Word2VecReader(
-        args.dict_path, args.train_data_dir, filelist, 0, 1)
+    word2vec_reader = reader.Word2VecReader(args.dict_path, args.train_data_dir,
+                                            filelist, 0, 1)

    logger.info("dict_size: {}".format(word2vec_reader.dict_size))
    np_power = np.power(np.array(word2vec_reader.id_frequencys), 0.75)
@@ -216,18 +216,16 @@ def train(args):
        is_sparse=args.is_sparse,
        neg_num=args.nce_num)

-    optimizer = fluid.optimizer.SGD(
-        learning_rate=fluid.layers.exponential_decay(
-            learning_rate=args.base_lr,
-            decay_steps=100000,
-            decay_rate=0.999,
-            staircase=True))
+    learning_rate = paddle.optimizer.lr.ExponentialDecay(
+        args.base_lr, gama=0.999)
+
+    optimizer = paddle.optimizer.SGD(learning_rate=learning_rate)

    optimizer.minimize(loss)

    logger.info("run dist training")

-    t = fluid.DistributeTranspiler()
+    t = paddle.fluid.DistributeTranspiler()
    t.transpile(
        args.trainer_id, pservers=args.endpoints, trainers=args.trainers)
    if args.role == "pserver":
@@ -235,14 +233,14 @@ def train(args):
        pserver_prog = t.get_pserver_program(args.current_endpoint)
        pserver_startup = t.get_startup_program(args.current_endpoint,
                                                pserver_prog)
-        exe = fluid.Executor(fluid.CPUPlace())
+        exe = paddle.static.Executor(paddle.CPUPlace())
        exe.run(pserver_startup)
        exe.run(pserver_prog)
    elif args.role == "trainer":
        print("run trainer")
        train_loop(args,
                   t.get_trainer_program(), word2vec_reader, py_reader, loss,
-                   args.trainer_id, id_frequencys_pow)
+                   args.trainer_id, id_frequencys_pow, learning_rate)


 if __name__ == '__main__':

--- a/demo/quant/quant_embedding/infer.py
+++ b/demo/quant/quant_embedding/infer.py
@@ -6,7 +6,6 @@ import unittest
 import contextlib
 import numpy as np
 import six
-import paddle.fluid as fluid
 import paddle
 import net
 import utils
@@ -63,19 +62,18 @@ def parse_args():

 def infer_epoch(args, vocab_size, test_reader, use_cuda, i2w):
    """ inference function """
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    exe = fluid.Executor(place)
+    place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
+    exe = paddle.static.Executor(place)
    emb_size = args.emb_size
    batch_size = args.batch_size
-    with fluid.scope_guard(fluid.Scope()):
-        main_program = fluid.Program()
-        with fluid.program_guard(main_program):
+    with paddle.static.scope_guard(paddle.static.Scope()):
+        main_program = paddle.static.Program()
+        with paddle.static.program_guard(main_program):
            values, pred = net.infer_network(vocab_size, emb_size)
            for epoch in range(start_index, last_index + 1):
                copy_program = main_program.clone()
                model_path = model_dir + "/pass-" + str(epoch)
-                fluid.io.load_params(
-                    executor=exe, dirname=model_path, main_program=copy_program)
+                paddle.static.load(copy_program, model_path, exe)
                if args.emb_quant:
                    config = {
                        'quantize_op_types': 'lookup_table',
@@ -84,10 +82,8 @@ def infer_epoch(args, vocab_size, test_reader, use_cuda, i2w):
                        },
                    }
                    copy_program = quant_embedding(copy_program, place, config)
-                    fluid.io.save_persistables(
-                        exe,
-                        './output_quant/pass-' + str(epoch),
-                        main_program=copy_program)
+                    paddle.static.save(copy_program,
+                                       './output_quant/pass-' + str(epoch))

                accum_num = 0
                accum_num_sum = 0.0
@@ -139,23 +135,20 @@ def infer_epoch(args, vocab_size, test_reader, use_cuda, i2w):

 def infer_step(args, vocab_size, test_reader, use_cuda, i2w):
    """ inference function """
-    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
-    exe = fluid.Executor(place)
+    place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
+    exe = paddle.static.Executor(place)
    emb_size = args.emb_size
    batch_size = args.batch_size
-    with fluid.scope_guard(fluid.Scope()):
-        main_program = fluid.Program()
-        with fluid.program_guard(main_program):
+    with paddle.static.scope_guard(paddle.static.Scope()):
+        main_program = paddle.static.Program()
+        with paddle.static.program_guard(main_program):
            values, pred = net.infer_network(vocab_size, emb_size)
            for epoch in range(start_index, last_index + 1):
                for batchid in range(args.start_batch, args.end_batch):
                    copy_program = main_program.clone()
                    model_path = model_dir + "/pass-" + str(epoch) + (
                        '/batch-' + str(batchid * args.print_step))
-                    fluid.io.load_params(
-                        executor=exe,
-                        dirname=model_path,
-                        main_program=copy_program)
+                    paddle.static.load(copy_program, model_path, exe)
                    accum_num = 0
                    accum_num_sum = 0.0
                    t0 = time.time()

--- a/demo/quant/quant_embedding/net.py
+++ b/demo/quant/quant_embedding/net.py
@@ -17,16 +17,19 @@ neural network for word2vec
 from __future__ import print_function
 import math
 import numpy as np
+import paddle
 import paddle.fluid as fluid


 def skip_gram_word2vec(dict_size, embedding_size, is_sparse=False, neg_num=5):

    datas = []
-    input_word = fluid.layers.data(name="input_word", shape=[1], dtype='int64')
-    true_word = fluid.layers.data(name='true_label', shape=[1], dtype='int64')
-    neg_word = fluid.layers.data(
-        name="neg_label", shape=[neg_num], dtype='int64')
+    input_word = paddle.static.data(
+        name="input_word", shape=[None, 1], dtype='int64')
+    true_word = paddle.static.data(
+        name='true_label', shape=[None, 1], dtype='int64')
+    neg_word = paddle.static.data(
+        name="neg_label", shape=[None, neg_num], dtype='int64')

    datas.append(input_word)
    datas.append(true_word)
@@ -36,60 +39,60 @@ def skip_gram_word2vec(dict_size, embedding_size, is_sparse=False, neg_num=5):
        capacity=64, feed_list=datas, name='py_reader', use_double_buffer=True)

    words = fluid.layers.read_file(py_reader)
+    words[0] = paddle.reshape(words[0], [-1])
+    words[1] = paddle.reshape(words[1], [-1])
    init_width = 0.5 / embedding_size
-    input_emb = fluid.layers.embedding(
+    input_emb = paddle.static.nn.embedding(
        input=words[0],
        is_sparse=is_sparse,
        size=[dict_size, embedding_size],
-        param_attr=fluid.ParamAttr(
+        param_attr=paddle.ParamAttr(
            name='emb',
-            initializer=fluid.initializer.Uniform(-init_width, init_width)))
+            initializer=paddle.nn.initializer.Uniform(-init_width, init_width)))

-    true_emb_w = fluid.layers.embedding(
+    true_emb_w = paddle.static.nn.embedding(
        input=words[1],
        is_sparse=is_sparse,
        size=[dict_size, embedding_size],
-        param_attr=fluid.ParamAttr(
-            name='emb_w', initializer=fluid.initializer.Constant(value=0.0)))
+        param_attr=paddle.ParamAttr(
+            name='emb_w',
+            initializer=paddle.nn.initializer.Constant(value=0.0)))

-    true_emb_b = fluid.layers.embedding(
+    true_emb_b = paddle.static.nn.embedding(
        input=words[1],
        is_sparse=is_sparse,
        size=[dict_size, 1],
-        param_attr=fluid.ParamAttr(
-            name='emb_b', initializer=fluid.initializer.Constant(value=0.0)))
-    neg_word_reshape = fluid.layers.reshape(words[2], shape=[-1, 1])
+        param_attr=paddle.ParamAttr(
+            name='emb_b',
+            initializer=paddle.nn.initializer.Constant(value=0.0)))
+    neg_word_reshape = paddle.reshape(words[2], shape=[-1])
    neg_word_reshape.stop_gradient = True

-    neg_emb_w = fluid.layers.embedding(
+    neg_emb_w = paddle.static.nn.embedding(
        input=neg_word_reshape,
        is_sparse=is_sparse,
        size=[dict_size, embedding_size],
-        param_attr=fluid.ParamAttr(
+        param_attr=paddle.ParamAttr(
            name='emb_w', learning_rate=1.0))

-    neg_emb_w_re = fluid.layers.reshape(
+    neg_emb_w_re = paddle.reshape(
        neg_emb_w, shape=[-1, neg_num, embedding_size])
-    neg_emb_b = fluid.layers.embedding(
+    neg_emb_b = paddle.static.nn.embedding(
        input=neg_word_reshape,
        is_sparse=is_sparse,
        size=[dict_size, 1],
-        param_attr=fluid.ParamAttr(
+        param_attr=paddle.ParamAttr(
            name='emb_b', learning_rate=1.0))

-    neg_emb_b_vec = fluid.layers.reshape(neg_emb_b, shape=[-1, neg_num])
-    true_logits = fluid.layers.elementwise_add(
-        fluid.layers.reduce_sum(
-            fluid.layers.elementwise_mul(input_emb, true_emb_w),
-            dim=1,
-            keep_dim=True),
-        true_emb_b)
-    input_emb_re = fluid.layers.reshape(
-        input_emb, shape=[-1, 1, embedding_size])
+    neg_emb_b_vec = paddle.reshape(neg_emb_b, shape=[-1, neg_num])
+    true_logits = paddle.add(paddle.mean(
+        paddle.multiply(input_emb, true_emb_w), axis=1, keepdim=True),
+                             true_emb_b)
+    input_emb_re = paddle.reshape(input_emb, shape=[-1, 1, embedding_size])
    neg_matmul = fluid.layers.matmul(
        input_emb_re, neg_emb_w_re, transpose_y=True)
-    neg_matmul_re = fluid.layers.reshape(neg_matmul, shape=[-1, neg_num])
-    neg_logits = fluid.layers.elementwise_add(neg_matmul_re, neg_emb_b_vec)
+    neg_matmul_re = paddle.reshape(neg_matmul, shape=[-1, neg_num])
+    neg_logits = paddle.add(neg_matmul_re, neg_emb_b_vec)
    #nce loss

    label_ones = fluid.layers.fill_constant_batch_size_like(
@@ -101,36 +104,36 @@ def skip_gram_word2vec(dict_size, embedding_size, is_sparse=False, neg_num=5):
                                                               label_ones)
    neg_xent = fluid.layers.sigmoid_cross_entropy_with_logits(neg_logits,
                                                              label_zeros)
-    cost = fluid.layers.elementwise_add(
-        fluid.layers.reduce_sum(
-            true_xent, dim=1),
-        fluid.layers.reduce_sum(
-            neg_xent, dim=1))
-    avg_cost = fluid.layers.reduce_mean(cost)
+    cost = paddle.add(paddle.sum(true_xent, axis=1),
+                      paddle.sum(neg_xent, axis=1))
+    avg_cost = paddle.mean(cost)
    return avg_cost, py_reader


 def infer_network(vocab_size, emb_size):
-    analogy_a = fluid.layers.data(name="analogy_a", shape=[1], dtype='int64')
-    analogy_b = fluid.layers.data(name="analogy_b", shape=[1], dtype='int64')
-    analogy_c = fluid.layers.data(name="analogy_c", shape=[1], dtype='int64')
-    all_label = fluid.layers.data(
-        name="all_label",
-        shape=[vocab_size, 1],
-        dtype='int64',
-        append_batch_size=False)
-    emb_all_label = fluid.layers.embedding(
+    analogy_a = paddle.static.data(
+        name="analogy_a", shape=[None, 1], dtype='int64')
+    analogy_b = paddle.static.data(
+        name="analogy_b", shape=[None, 1], dtype='int64')
+    analogy_c = paddle.static.data(
+        name="analogy_c", shape=[None, 1], dtype='int64')
+    all_label = paddle.static.data(
+        name="all_label", shape=[vocab_size, 1], dtype='int64')
+    all_label = paddle.reshape(all_label, [-1])
+    emb_all_label = paddle.static.nn.embedding(
        input=all_label, size=[vocab_size, emb_size], param_attr="emb")

-    emb_a = fluid.layers.embedding(
+    analogy_a = paddle.reshape(analogy_a, [-1])
+    emb_a = paddle.static.nn.embedding(
        input=analogy_a, size=[vocab_size, emb_size], param_attr="emb")
-    emb_b = fluid.layers.embedding(
+    analogy_b = paddle.reshape(analogy_b, [-1])
+    emb_b = paddle.static.nn.embedding(
        input=analogy_b, size=[vocab_size, emb_size], param_attr="emb")
-    emb_c = fluid.layers.embedding(
+    analogy_c = paddle.reshape(analogy_c, [-1])
+    emb_c = paddle.static.nn.embedding(
        input=analogy_c, size=[vocab_size, emb_size], param_attr="emb")
-    target = fluid.layers.elementwise_add(
-        fluid.layers.elementwise_sub(emb_b, emb_a), emb_c)
+    target = paddle.add(paddle.add(emb_b, -emb_a), emb_c)
    emb_all_label_l2 = fluid.layers.l2_normalize(x=emb_all_label, axis=1)
    dist = fluid.layers.matmul(x=target, y=emb_all_label_l2, transpose_y=True)
-    values, pred_idx = fluid.layers.topk(input=dist, k=4)
+    values, pred_idx = paddle.topk(x=dist, k=4)
    return values, pred_idx
--- a/demo/quant/quant_embedding/preprocess.py
+++ b/demo/quant/quant_embedding/preprocess.py
@@ -11,7 +11,7 @@ prog = re.compile("[^a-z ]", flags=0)

 def parse_args():
    parser = argparse.ArgumentParser(
-        description="Paddle Fluid word2 vector preprocess")
+        description="Paddle word2 vector preprocess")
    parser.add_argument(
        '--build_dict_corpus_dir', type=str, help="The dir of corpus")
    parser.add_argument(
@@ -112,8 +112,7 @@ def filter_corpus(args):
    for file in os.listdir(args.input_corpus_dir):
        with io.open(args.output_corpus_dir + '/convert_' + file, "w") as wf:
            with io.open(
-                    args.input_corpus_dir + '/' + file,
-                    encoding='utf-8') as rf:
+                    args.input_corpus_dir + '/' + file, encoding='utf-8') as rf:
                print(args.input_corpus_dir + '/' + file)
                for line in rf:
                    signal = False
@@ -153,8 +152,7 @@ def build_dict(args):

    for file in os.listdir(args.build_dict_corpus_dir):
        with io.open(
-                args.build_dict_corpus_dir + "/" + file,
-                encoding='utf-8') as f:
+                args.build_dict_corpus_dir + "/" + file, encoding='utf-8') as f:
            print("build dict : ", args.build_dict_corpus_dir + "/" + file)
            for line in f:
                line = text_strip(line)

--- a/demo/quant/quant_embedding/reader.py
+++ b/demo/quant/quant_embedding/reader.py
@@ -8,7 +8,7 @@ import random
 import io

 logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
-logger = logging.getLogger("fluid")
+logger = logging.getLogger("paddle")
 logger.setLevel(logging.INFO)


@@ -64,8 +64,8 @@ class Word2VecReader(object):
        self.id_frequencys = [
            float(count) / word_all_count for count in self.id_counts_
        ]
-        print("dict_size = " + str(self.dict_size) + " word_all_count = " +
-              str(word_all_count))
+        print("dict_size = " + str(self.dict_size) + " word_all_count = " + str(
+            word_all_count))

        self.random_generator = NumpyRandomInt(1, self.window_size_ + 1)


--- a/demo/quant/quant_embedding/train.py
+++ b/demo/quant/quant_embedding/train.py
@@ -7,13 +7,12 @@ import math
 import random
 import numpy as np
 import paddle
-import paddle.fluid as fluid
 import six
 import reader
 from net import skip_gram_word2vec

 logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
-logger = logging.getLogger("fluid")
+logger = logging.getLogger("paddle")
 logger.setLevel(logging.INFO)


@@ -91,20 +90,20 @@ def convert_python_to_tensor(weight, batch_size, sample_reader):
            if len(result[0]) == batch_size:
                tensor_result = []
                for tensor in result:
-                    t = fluid.Tensor()
+                    t = paddle.fluid.Tensor()
                    dat = np.array(tensor, dtype='int64')
                    if len(dat.shape) > 2:
                        dat = dat.reshape((dat.shape[0], dat.shape[2]))
                    elif len(dat.shape) == 1:
                        dat = dat.reshape((-1, 1))
-                    t.set(dat, fluid.CPUPlace())
+                    t.set(dat, paddle.CPUPlace())
                    tensor_result.append(t)
-                tt = fluid.Tensor()
+                tt = paddle.fluid.Tensor()
                neg_array = cs.searchsorted(np.random.sample(args.nce_num))
                neg_array = np.tile(neg_array, batch_size)
                tt.set(
                    neg_array.reshape((batch_size, args.nce_num)),
-                    fluid.CPUPlace())
+                    paddle.CPUPlace())
                tensor_result.append(tt)
                yield tensor_result
                result = [[], []]
@@ -112,32 +111,28 @@ def convert_python_to_tensor(weight, batch_size, sample_reader):
    return __reader__


-def train_loop(args, train_program, reader, py_reader, loss, trainer_id,
-               weight):
+def train_loop(args, train_program, reader, py_reader, loss, trainer_id, weight,
+               lr):

    py_reader.decorate_tensor_provider(
        convert_python_to_tensor(weight, args.batch_size, reader.train()))

-    place = fluid.CPUPlace()
-    exe = fluid.Executor(place)
-    exe.run(fluid.default_startup_program())
+    place = paddle.CPUPlace()
+    exe = paddle.static.Executor(place)
+    exe.run(paddle.static.default_startup_program())

-    exec_strategy = fluid.ExecutionStrategy()
+    exec_strategy = paddle.static.ExecutionStrategy()
    exec_strategy.use_experimental_executor = True

    print("CPU_NUM:" + str(os.getenv("CPU_NUM")))
    exec_strategy.num_threads = int(os.getenv("CPU_NUM"))

-    build_strategy = fluid.BuildStrategy()
+    build_strategy = paddle.static.BuildStrategy()
    if int(os.getenv("CPU_NUM")) > 1:
-        build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
+        build_strategy.reduce_strategy = paddle.static.BuildStrategy.ReduceStrategy.Reduce

-    train_exe = fluid.ParallelExecutor(
-        use_cuda=False,
-        loss_name=loss.name,
-        main_program=train_program,
-        build_strategy=build_strategy,
-        exec_strategy=exec_strategy)
+    program = paddle.static.CompiledProgram(train_program).with_data_parallel(
+        loss_name=loss.name, build_strategy=build_strategy)

    for pass_id in range(args.num_passes):
        py_reader.start()
@@ -148,7 +143,7 @@ def train_loop(args, train_program, reader, py_reader, loss, trainer_id,
        try:
            while True:

-                loss_val = train_exe.run(fetch_list=[loss.name])
+                loss_val = exe.run(program, fetch_list=[loss.name])
                loss_val = np.mean(loss_val)

                if batch_id % args.print_batch == 0:
@@ -164,23 +159,24 @@ def train_loop(args, train_program, reader, py_reader, loss, trainer_id,
                            os.getenv("CPU_NUM"))
                        logger.info("Time used: {}, Samples/Sec: {}".format(
                            elapsed, samples / elapsed))
+                lr.step()

                if batch_id % args.save_step == 0 and batch_id != 0:
                    model_dir = args.model_output_dir + '/pass-' + str(
                        pass_id) + ('/batch-' + str(batch_id))
                    if trainer_id == 0:
-                        fluid.io.save_params(executor=exe, dirname=model_dir)
+                        paddle.static.save(train_program, model_dir)
                        print("model saved in %s" % model_dir)
                batch_id += 1

-        except fluid.core.EOFException:
+        except paddle.fluid.core.EOFException:
            py_reader.reset()
            epoch_end = time.time()
            logger.info("Epoch: {0}, Train total expend: {1} ".format(
                pass_id, epoch_end - epoch_start))
            model_dir = args.model_output_dir + '/pass-' + str(pass_id)
            if trainer_id == 0:
-                fluid.io.save_params(executor=exe, dirname=model_dir)
+                paddle.static.save(train_program, model_dir)
                print("model saved in %s" % model_dir)


@@ -207,20 +203,18 @@ def train(args):
        is_sparse=args.is_sparse,
        neg_num=args.nce_num)

-    optimizer = fluid.optimizer.SGD(
-        learning_rate=fluid.layers.exponential_decay(
-            learning_rate=args.base_lr,
-            decay_steps=100000,
-            decay_rate=0.999,
-            staircase=True))
+    learning_rate = paddle.optimizer.lr.ExponentialDecay(
+        args.base_lr, gamma=0.999)
+
+    optimizer = paddle.optimizer.SGD(learning_rate=learning_rate)

    optimizer.minimize(loss)

    # do local training 
    logger.info("run local training")
-    main_program = fluid.default_main_program()
+    main_program = paddle.static.default_main_program()
    train_loop(args, main_program, word2vec_reader, py_reader, loss, 0,
-               id_frequencys_pow)
+               id_frequencys_pow, learning_rate)


 if __name__ == '__main__':

--- a/demo/quant/quant_embedding/utils.py
+++ b/demo/quant/quant_embedding/utils.py
@@ -3,7 +3,6 @@ import collections
 import six
 import time
 import numpy as np
-import paddle.fluid as fluid
 import paddle
 import os
 import preprocess

--- a/demo/quant/quant_post/README.md
+++ b/demo/quant/quant_post/README.md
@@ -17,7 +17,7 @@
 - ``'val_list.txt'``文件

 ### 准备需要量化的模型
-因为离线量化接口只支持加载通过``fluid.io.save_inference_model``接口保存的模型，因此如果您的模型是通过其他接口保存的，那需要先将模型进行转化。本示例将以分类模型为例进行说明。
+因为离线量化接口只支持加载通过``paddle.static.save_inference_model``接口保存的模型，因此如果您的模型是通过其他接口保存的，那需要先将模型进行转化。本示例将以分类模型为例进行说明。

 首先在[imagenet分类模型](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/image_classification#%E5%B7%B2%E5%8F%91%E5%B8%83%E6%A8%A1%E5%9E%8B%E5%8F%8A%E5%85%B6%E6%80%A7%E8%83%BD)中下载训练好的``mobilenetv1``模型。


--- a/demo/quant/quant_post/image_classification_post_training_quantization_quick_start.ipynb
+++ b/demo/quant/quant_post/image_classification_post_training_quantization_quick_start.ipynb
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# 图像分类模型离线量化-快速开始"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "该教程以图像分类模型MobileNetV1为例，说明如何快速使用PaddleSlim的[离线量化接口](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/docs/docs/api/quantization_api.md)。 该示例包含以下步骤：\n",
-    "\n",
-    "1. 导入依赖\n",
-    "2. 构建模型\n",
-    "3. 训练模型\n",
-    "4. 离线量化"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 1. 导入依赖\n",
-    "PaddleSlim依赖Paddle1.7版本，请确认已正确安装Paddle，然后按以下方式导入Paddle和PaddleSlim:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import paddle\n",
-    "import paddle.fluid as fluid\n",
-    "import paddleslim as slim\n",
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 2. 构建网络\n",
-    "该章节构造一个用于对MNIST数据进行分类的分类模型，选用`MobileNetV1`，并将输入大小设置为`[1, 28, 28]`，输出类别数为10。               为了方便展示示例，我们在`paddleslim.models`下预定义了用于构建分类模型的方法，执行以下代码构建分类模型：\n",
-    "\n",
-    ">注意：paddleslim.models下的API并非PaddleSlim常规API，是为了简化示例而封装预定义的一系列方法，比如：模型结构的定义、Program的构建等。"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "exe, train_program, val_program, inputs, outputs = \\\n",
-    "    slim.models.image_classification(\"MobileNet\", [1, 28, 28], 10, use_gpu=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 3. 训练模型\n",
-    "该章节介绍了如何定义输入数据和如何训练和测试分类模型。先训练分类模型的原因是离线量化需要一个训练好的模型。\n",
-    "\n",
-    "### 3.1 定义输入数据\n",
-    "\n",
-    "为了快速执行该示例，我们选取简单的MNIST数据，Paddle框架的`paddle.dataset.mnist`包定义了MNIST数据的下载和读取。\n",
-    "代码如下："
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import paddle.dataset.mnist as reader\n",
-    "train_reader = paddle.fluid.io.batch(\n",
-    "        reader.train(), batch_size=128, drop_last=True)\n",
-    "test_reader = paddle.fluid.io.batch(\n",
-    "        reader.train(), batch_size=128, drop_last=True)\n",
-    "train_feeder = fluid.DataFeeder(inputs, fluid.CPUPlace())"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 3.2 训练和测试\n",
-    "先定义训练和测试函数。在训练函数中执行了一个epoch的训练，因为MNIST数据集数据较少，一个epoch就可将top1精度训练到95%以上。\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def train(prog):\n",
-    "    iter = 0\n",
-    "    for data in train_reader():\n",
-    "        acc1, acc5, loss = exe.run(prog, feed=train_feeder.feed(data), fetch_list=outputs)\n",
-    "        if iter % 100 == 0:\n",
-    "            print('train', acc1.mean(), acc5.mean(), loss.mean())\n",
-    "        iter += 1\n",
-    "        \n",
-    "def test(prog, outputs=outputs):\n",
-    "    iter = 0\n",
-    "    res = [[], []]\n",
-    "    for data in train_reader():\n",
-    "        acc1, acc5, loss = exe.run(prog, feed=train_feeder.feed(data), fetch_list=outputs)\n",
-    "        if iter % 100 == 0:\n",
-    "            print('test', acc1.mean(), acc5.mean(), loss.mean())\n",
-    "        res[0].append(acc1.mean())\n",
-    "        res[1].append(acc5.mean())\n",
-    "        iter += 1\n",
-    "    print('final test result', np.array(res[0]).mean(), np.array(res[1]).mean())"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "调用``train``函数训练分类网络，``train_program``是在第2步：构建网络中定义的。"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "('train', 0.0625, 0.5234375, 2.6373053)\n",
-      "('train', 0.9375, 0.9921875, 0.20106347)\n",
-      "('train', 0.953125, 1.0, 0.13234669)\n",
-      "('train', 0.96875, 0.9921875, 0.18056682)\n",
-      "('train', 0.9453125, 1.0, 0.15847622)\n"
-     ]
-    }
-   ],
-   "source": [
-    "train(train_program)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "调用``test``函数测试分类网络，``val_program``是在第2步：构建网络中定义的。"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "('test', 0.9609375, 0.9921875, 0.12996897)\n",
-      "('test', 0.9609375, 1.0, 0.094265014)\n",
-      "('test', 0.9453125, 1.0, 0.10511534)\n",
-      "('test', 0.9765625, 1.0, 0.11341806)\n",
-      "('test', 0.953125, 1.0, 0.17046008)\n",
-      "('final test result', 0.9647603, 0.99943244)\n"
-     ]
-    }
-   ],
-   "source": [
-    "test(val_program)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "保存inference model，将训练好的分类模型保存在``'./inference_model'``下，后续进行离线量化时将加载保存在此处的模型。"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[u'save_infer_model/scale_0',\n",
-       " u'save_infer_model/scale_1',\n",
-       " u'save_infer_model/scale_2']"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "target_vars = [val_program.global_block().var(name) for name in outputs]\n",
-    "fluid.io.save_inference_model(dirname='./inference_model',\n",
-    "        feeded_var_names=[var.name for var in inputs],\n",
-    "        target_vars=target_vars,\n",
-    "        executor=exe,\n",
-    "        main_program=val_program)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 4. 离线量化\n",
-    "\n",
-    "调用离线量化接口，加载文件夹``'./inference_model'``训练好的分类模型，并使用10个batch的数据进行参数校正。此过程无需训练，只需跑前向过程来计算量化所需参数。离线量化后的模型保存在文件夹``'./quant_post_model'``下。"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2020-02-06 09:32:42,944-INFO: run batch: 0\n",
-      "2020-02-06 09:32:42,944-INFO: run batch: 0\n",
-      "2020-02-06 09:32:43,233-INFO: run batch: 5\n",
-      "2020-02-06 09:32:43,233-INFO: run batch: 5\n",
-      "2020-02-06 09:32:43,362-INFO: all run batch: 10\n",
-      "2020-02-06 09:32:43,362-INFO: all run batch: 10\n",
-      "2020-02-06 09:32:43,365-INFO: calculate scale factor ...\n",
-      "2020-02-06 09:32:43,365-INFO: calculate scale factor ...\n",
-      "2020-02-06 09:32:54,841-INFO: update the program ...\n",
-      "2020-02-06 09:32:54,841-INFO: update the program ...\n"
-     ]
-    }
-   ],
-   "source": [
-    "slim.quant.quant_post(\n",
-    "        executor=exe,\n",
-    "        model_dir='./inference_model',\n",
-    "        quantize_model_path='./quant_post_model',\n",
-    "        sample_generator=reader.test(),\n",
-    "        batch_nums=10)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "加载保存在文件夹``'./quant_post_model'``下的量化后的模型进行测试，可看到精度和``3.2 训练和测试``中得到的测试精度相近，因此离线量化过程对于此分类模型几乎无损。"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "('test', 0.9765625, 0.9921875, 0.11411239)\n",
-      "('test', 0.953125, 1.0, 0.111179784)\n",
-      "('test', 0.953125, 1.0, 0.101078615)\n",
-      "('test', 0.96875, 1.0, 0.0993958)\n",
-      "('test', 0.9609375, 1.0, 0.16066414)\n",
-      "('final test result', 0.9643096, 0.99931556)\n"
-     ]
-    }
-   ],
-   "source": [
-    "quant_post_prog, feed_target_names, fetch_targets = fluid.io.load_inference_model(\n",
-    "        dirname='./quant_post_model',\n",
-    "        model_filename='__model__',\n",
-    "        params_filename='__params__',\n",
-    "        executor=exe)\n",
-    "test(quant_post_prog, fetch_targets)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 2",
-   "language": "python",
-   "name": "python2"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.12"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
--- a/paddleslim/dist/dml.py
+++ b/paddleslim/dist/dml.py
@@ -18,28 +18,17 @@ from __future__ import print_function

 import copy
 import paddle
-import paddle.fluid as fluid
+import paddle.nn as nn

-PADDLE_VERSION = 1.8
-try:
-    from paddle.fluid.layers import log_softmax
-except:
-    from paddle.nn import LogSoftmax
-    PADDLE_VERSION = 2.0
+from paddle.nn import LogSoftmax


-class DML(fluid.dygraph.Layer):
+class DML(nn.Layer):
    def __init__(self, model, use_parallel=False):
        super(DML, self).__init__()
        self.model = model
        self.use_parallel = use_parallel
        self.model_num = len(self.model)
-        if self.use_parallel:
-            strategy = fluid.dygraph.parallel.prepare_context()
-            self.model = [
-                fluid.dygraph.parallel.DataParallel(m, strategy)
-                for m in self.model
-            ]

    def full_name(self):
        return [m.full_name() for m in self.model]
@@ -61,8 +50,9 @@ class DML(fluid.dygraph.Layer):
        ce_losses = []
        for i in range(self.model_num):
            ce_losses.append(
-                fluid.layers.mean(
-                    fluid.layers.softmax_with_cross_entropy(logits[i], labels)))
+                paddle.mean(
+                    paddle.nn.functional.softmax_with_cross_entropy(logits[i],
+                                                                    labels)))
        return ce_losses

    def kl_loss(self, logits):
@@ -76,13 +66,11 @@ class DML(fluid.dygraph.Layer):
            cur_kl_loss = 0
            for j in range(self.model_num):
                if i != j:
-                    if PADDLE_VERSION == 2.0:
-                        log_softmax = LogSoftmax(axis=1)
-                        x = log_softmax(logits[i])
-                    else:
-                        x = fluid.layers.log_softmax(logits[i], axis=1)
-                    y = fluid.layers.softmax(logits[j], axis=1)
-                    cur_kl_loss += fluid.layers.kldiv_loss(
+                    log_softmax = LogSoftmax(axis=1)
+                    x = log_softmax(logits[i])
+
+                    y = nn.functional.softmax(logits[j], axis=1)
+                    cur_kl_loss += nn.functional.kl_div(
                        x, y, reduction='batchmean')
            kl_losses.append(cur_kl_loss / (self.model_num - 1))
        return kl_losses
@@ -97,7 +85,7 @@ class DML(fluid.dygraph.Layer):

    def acc(self, logits, labels, k):
        accs = [
-            fluid.layers.accuracy(
+            paddle.metric.accuracy(
                input=l, label=labels, k=k) for l in logits
        ]
        return accs

--- a/paddleslim/quant/__init__.py
+++ b/paddleslim/quant/__init__.py
@@ -14,14 +14,14 @@

 import logging

-import paddle.fluid as fluid
+import paddle
 import paddle.version as fluid_version
 from ..common import get_logger

 _logger = get_logger(__name__, level=logging.INFO)

 try:
-    fluid.require_version('1.8.4')
+    paddle.utils.require_version('1.8.4')
    version_installed = [
        fluid_version.major, fluid_version.minor, fluid_version.patch,
        fluid_version.rc

--- a/tests/test_analysis_helper.py
+++ b/tests/test_analysis_helper.py
@@ -15,7 +15,6 @@ import sys
 sys.path.append("../")
 import unittest
 import paddle
-import paddle.fluid as fluid
 from paddleslim.common import VarCollector
 from static_case import StaticCase
 sys.path.append("../demo")
@@ -27,34 +26,38 @@ import numpy as np

 class TestAnalysisHelper(StaticCase):
    def test_analysis_helper(self):
-        image = fluid.layers.data(
-            name='image', shape=[1, 28, 28], dtype='float32')
-        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+        image = paddle.static.data(
+            name='image', shape=[None, 1, 28, 28], dtype='float32')
+        label = paddle.static.data(name='label', shape=[None, 1], dtype='int64')
        model = MobileNet()
        out = model.net(input=image, class_dim=10)
-        cost = fluid.layers.cross_entropy(input=out, label=label)
-        avg_cost = fluid.layers.mean(x=cost)
-        acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
-        acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
-        optimizer = fluid.optimizer.Momentum(
+        cost = paddle.nn.functional.loss.cross_entropy(input=out, label=label)
+        avg_cost = paddle.mean(x=cost)
+        acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
+        acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)
+        optimizer = paddle.optimizer.Momentum(
            momentum=0.9,
            learning_rate=0.01,
-            regularization=fluid.regularizer.L2Decay(4e-5))
+            weight_decay=paddle.regularizer.L2Decay(4e-5))
        optimizer.minimize(avg_cost)
-        main_prog = fluid.default_main_program()
+        main_prog = paddle.static.default_main_program()

-        places = fluid.cuda_places() if fluid.is_compiled_with_cuda(
-        ) else fluid.cpu_places()
-        exe = fluid.Executor(places[0])
-        train_reader = paddle.fluid.io.batch(
-            paddle.dataset.mnist.train(), batch_size=64)
-        train_loader = fluid.io.DataLoader.from_generator(
+        places = paddle.static.cuda_places() if paddle.is_compiled_with_cuda(
+        ) else paddle.static.cpu_places()
+        exe = paddle.static.Executor(places[0])
+
+        def transform(x):
+            return np.reshape(x, [1, 28, 28])
+
+        train_dataset = paddle.vision.datasets.MNIST(
+            mode='train', backend='cv2', transform=transform)
+        train_loader = paddle.io.DataLoader(
+            train_dataset,
+            places=places,
            feed_list=[image, label],
-            capacity=512,
-            use_double_buffer=True,
-            iterable=True)
-        train_loader.set_sample_list_generator(train_reader, places)
-        exe.run(fluid.default_startup_program())
+            drop_last=True,
+            batch_size=64)
+        exe.run(paddle.static.default_startup_program())

        vars = ['conv2d_0.tmp_0', 'fc_0.tmp_0', 'fc_0.tmp_1', 'fc_0.tmp_2']
        var_collector1 = VarCollector(main_prog, vars, use_ema=True)

--- a/tests/test_darts.py
+++ b/tests/test_darts.py
@@ -15,24 +15,23 @@ import sys
 sys.path.append("../")
 import paddle
 import unittest
-import paddle.fluid as fluid
 import numpy as np
 from static_case import StaticCase
 from paddleslim.nas.darts import DARTSearch
 from layers import conv_bn_layer


-class TestDARTS(StaticCase):
+class TestDARTS(unittest.TestCase):
    def test_darts(self):
-        class SuperNet(fluid.dygraph.Layer):
+        class SuperNet(paddle.nn.Layer):
            def __init__(self):
                super(SuperNet, self).__init__()
                self._method = 'DARTS'
                self._steps = 1
-                self.stem = fluid.dygraph.nn.Conv2D(
-                    num_channels=1, num_filters=3, filter_size=3, padding=1)
-                self.classifier = fluid.dygraph.nn.Linear(
-                    input_dim=2352, output_dim=10)
+                self.stem = paddle.nn.Conv2D(
+                    in_channels=1, out_channels=3, kernel_size=3, padding=1)
+                self.classifier = paddle.nn.Linear(
+                    in_features=2352, out_features=10)
                self._multiplier = 4
                self._primitives = [
                    'none', 'max_pool_3x3', 'avg_pool_3x3', 'skip_connect',
@@ -42,9 +41,9 @@ class TestDARTS(StaticCase):
                self._initialize_alphas()

            def _initialize_alphas(self):
-                self.alphas_normal = fluid.layers.create_parameter(
+                self.alphas_normal = self.create_parameter(
                    shape=[14, 8], dtype="float32")
-                self.alphas_reduce = fluid.layers.create_parameter(
+                self.alphas_reduce = self.create_parameter(
                    shape=[14, 8], dtype="float32")
                self._arch_parameters = [
                    self.alphas_normal,
@@ -57,14 +56,15 @@ class TestDARTS(StaticCase):
            def forward(self, input):
                out = self.stem(input) * self.alphas_normal[0][
                    0] * self.alphas_reduce[0][0]
-                out = fluid.layers.reshape(out, [0, -1])
+                out = paddle.reshape(out, [0, -1])
                logits = self.classifier(out)
                return logits

            def _loss(self, input, label):
                logits = self.forward(input)
-                return fluid.layers.reduce_mean(
-                    fluid.layers.softmax_with_cross_entropy(logits, label))
+                return paddle.mean(
+                    paddle.nn.functional.softmax_with_cross_entropy(logits,
+                                                                    label))

        def batch_generator(reader):
            def wrapper():
@@ -84,16 +84,15 @@ class TestDARTS(StaticCase):

            return wrapper

-        place = fluid.CUDAPlace(0)
-        with fluid.dygraph.guard(place):
-            model = SuperNet()
-            trainset = paddle.dataset.mnist.train()
-            validset = paddle.dataset.mnist.test()
-            train_reader = batch_generator(trainset)
-            valid_reader = batch_generator(validset)
-            searcher = DARTSearch(
-                model, train_reader, valid_reader, place, num_epochs=5)
-            searcher.train()
+        place = paddle.CUDAPlace(0)
+        model = SuperNet()
+        trainset = paddle.dataset.mnist.train()
+        validset = paddle.dataset.mnist.test()
+        train_reader = batch_generator(trainset)
+        valid_reader = batch_generator(validset)
+        searcher = DARTSearch(
+            model, train_reader, valid_reader, place, num_epochs=5)
+        searcher.train()


 if __name__ == '__main__':

--- a/tests/test_deep_mutual_learning.py
+++ b/tests/test_deep_mutual_learning.py
@@ -17,83 +17,73 @@ import unittest
 import logging
 import numpy as np
 import paddle
-from static_case import StaticCase
 import paddle.fluid as fluid
+from static_case import StaticCase
 import paddle.dataset.mnist as reader
-from paddle.fluid.dygraph.base import to_variable
 from paddleslim.models.dygraph import MobileNetV1
 from paddleslim.dist import DML
 from paddleslim.common import get_logger
 logger = get_logger(__name__, level=logging.INFO)


-class Model(fluid.dygraph.Layer):
+class Model(paddle.nn.Layer):
    def __init__(self):
        super(Model, self).__init__()
-        self.conv = fluid.dygraph.nn.Conv2D(
-            num_channels=1,
-            num_filters=256,
-            filter_size=3,
-            stride=1,
-            padding=1,
-            use_cudnn=False)
-        self.pool2d_avg = fluid.dygraph.nn.Pool2D(
-            pool_type='avg', global_pooling=True)
-        self.out = fluid.dygraph.nn.Linear(256, 10)
+        self.conv = paddle.nn.Conv2D(
+            in_channels=1, out_channels=256, kernel_size=3, stride=1, padding=1)
+        self.pool2d_avg = paddle.nn.Pool2D(pool_type='avg', global_pooling=True)
+        self.out = paddle.nn.Linear(256, 10)

    def forward(self, inputs):
-        inputs = fluid.layers.reshape(inputs, shape=[0, 1, 28, 28])
+        inputs = paddle.reshape(inputs, shape=[0, 1, 28, 28])
        y = self.conv(inputs)
        y = self.pool2d_avg(y)
-        y = fluid.layers.reshape(y, shape=[-1, 256])
+        y = paddle.reshape(y, shape=[-1, 256])
        y = self.out(y)
        return y


-class TestDML(StaticCase):
+class TestDML(unittest.TestCase):
    def test_dml(self):
-        place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda(
-        ) else fluid.CPUPlace()
-        with fluid.dygraph.guard(place):
-            train_reader = paddle.fluid.io.batch(
-                paddle.dataset.mnist.train(), batch_size=256)
-            train_loader = fluid.io.DataLoader.from_generator(
-                capacity=1024, return_list=True)
-            train_loader.set_sample_list_generator(train_reader, places=place)
+        place = paddle.CUDAPlace(0) if paddle.is_compiled_with_cuda(
+        ) else paddle.CPUPlace()
+
+        def transform(x):
+            return np.reshape(x, [1, 28, 28])
+
+        train_dataset = paddle.vision.datasets.MNIST(
+            mode='train', backend='cv2', transform=transform)
+        train_loader = paddle.io.DataLoader(
+            train_dataset, places=place, drop_last=True, batch_size=64)

-            models = [Model(), Model()]
-            optimizers = []
-            for cur_model in models:
-                opt = fluid.optimizer.MomentumOptimizer(
-                    0.1, 0.9, parameter_list=cur_model.parameters())
-                optimizers.append(opt)
-            dml_model = DML(models)
-            dml_optimizer = dml_model.opt(optimizers)
+        models = [Model(), Model()]
+        optimizers = []
+        for cur_model in models:
+            opt = paddle.optimizer.Momentum(
+                0.1, 0.9, parameters=cur_model.parameters())
+            optimizers.append(opt)
+        dml_model = DML(models)
+        dml_optimizer = dml_model.opt(optimizers)

-            def train(train_loader, dml_model, dml_optimizer):
-                dml_model.train()
-                for step_id, (images, labels) in enumerate(train_loader):
-                    images, labels = to_variable(images), to_variable(labels)
-                    labels = fluid.layers.reshape(labels, [0, 1])
+        def train(train_loader, dml_model, dml_optimizer):
+            dml_model.train()
+            for step_id, (images, labels) in enumerate(train_loader):
+                images, labels = paddle.to_tensor(images), paddle.to_tensor(
+                    labels)
+                labels = paddle.reshape(labels, [0, 1])

-                    logits = dml_model.forward(images)
-                    precs = [
-                        fluid.layers.accuracy(
-                            input=l, label=labels, k=1).numpy() for l in logits
-                    ]
-                    losses = dml_model.loss(logits, labels)
-                    dml_optimizer.minimize(losses)
-                    if step_id % 10 == 0:
-                        print(step_id, precs)
+                logits = dml_model.forward(images)
+                precs = [
+                    paddle.metric.accuracy(
+                        input=l, label=labels, k=1).numpy() for l in logits
+                ]
+                losses = dml_model.loss(logits, labels)
+                dml_optimizer.minimize(losses)
+                if step_id % 10 == 0:
+                    print(step_id, precs)

-            for epoch_id in range(10):
-                current_step_lr = dml_optimizer.get_lr()
-                lr_msg = "Epoch {}".format(epoch_id)
-                for model_id, lr in enumerate(current_step_lr):
-                    lr_msg += ", {} lr: {:.6f}".format(
-                        dml_model.full_name()[model_id], lr)
-                logger.info(lr_msg)
-                train(train_loader, dml_model, dml_optimizer)
+        for epoch_id in range(10):
+            train(train_loader, dml_model, dml_optimizer)


 if __name__ == '__main__':

--- a/tests/test_dygraph_quant_aware.py
+++ b/tests/test_dygraph_quant_aware.py
@@ -20,8 +20,6 @@ import logging
 import paddle
 import paddle.nn as nn
 import paddle.fluid as fluid
-from paddle.fluid.optimizer import AdamOptimizer
-from paddle.fluid.dygraph.container import Sequential
 from paddle.fluid.dygraph.nn import Conv2D
 from paddle.fluid.dygraph.nn import Pool2D
 from paddle.fluid.dygraph.nn import Linear
@@ -36,7 +34,7 @@ _logger = get_logger(
 class ImperativeLenet(nn.Layer):
    def __init__(self, num_classes=10, classifier_activation='softmax'):
        super(ImperativeLenet, self).__init__()
-        self.features = Sequential(
+        self.features = paddle.nn.Sequential(
            Conv2D(
                num_channels=1,
                num_filters=6,
@@ -54,7 +52,7 @@ class ImperativeLenet(nn.Layer):
            Pool2D(
                pool_size=2, pool_type='max', pool_stride=2))

-        self.fc = Sequential(
+        self.fc = paddle.nn.Sequential(
            Linear(
                input_dim=400, output_dim=120),
            Linear(
@@ -66,7 +64,7 @@ class ImperativeLenet(nn.Layer):
    def forward(self, inputs):
        x = self.features(inputs)

-        x = fluid.layers.flatten(x, 1)
+        x = paddle.flatten(x, 1)
        x = self.fc(x)
        return x

@@ -79,83 +77,79 @@ class TestImperativeQatDefaultConfig(unittest.TestCase):
    """

    def test_qat_acc(self):
-        with fluid.dygraph.guard():
-            lenet = ImperativeLenet()
-            quant_lenet = quant_aware(lenet)
-
-            train_reader = paddle.batch(
-                paddle.dataset.mnist.train(), batch_size=32, drop_last=True)
-            test_reader = paddle.batch(
-                paddle.dataset.mnist.test(), batch_size=32)
-
-            def train(model):
-                adam = AdamOptimizer(
-                    learning_rate=0.001, parameter_list=model.parameters())
-                epoch_num = 1
-                for epoch in range(epoch_num):
-                    model.train()
-                    for batch_id, data in enumerate(train_reader()):
-                        x_data = np.array(
-                            [x[0].reshape(1, 28, 28)
-                             for x in data]).astype('float32')
-                        y_data = np.array(
-                            [x[1] for x in data]).astype('int64').reshape(-1, 1)
-
-                        img = fluid.dygraph.to_variable(x_data)
-                        label = fluid.dygraph.to_variable(y_data)
-                        out = model(img)
-                        acc = fluid.layers.accuracy(out, label)
-                        loss = fluid.layers.cross_entropy(out, label)
-                        avg_loss = fluid.layers.mean(loss)
-                        avg_loss.backward()
-                        adam.minimize(avg_loss)
-                        model.clear_gradients()
-                        if batch_id % 100 == 0:
-                            _logger.info(
-                                "Train | At epoch {} step {}: loss = {:}, acc= {:}".
-                                format(epoch, batch_id,
-                                       avg_loss.numpy(), acc.numpy()))
-
-            def test(model):
-                model.eval()
-                avg_acc = [[], []]
-                for batch_id, data in enumerate(test_reader()):
-                    x_data = np.array([x[0].reshape(1, 28, 28)
-                                       for x in data]).astype('float32')
-                    y_data = np.array(
-                        [x[1] for x in data]).astype('int64').reshape(-1, 1)
-
-                    img = fluid.dygraph.to_variable(x_data)
-                    label = fluid.dygraph.to_variable(y_data)
-
+        lenet = ImperativeLenet()
+        quant_lenet = quant_aware(lenet)
+
+        place = paddle.CUDAPlace(0) if paddle.is_compiled_with_cuda(
+        ) else paddle.CPUPlace()
+
+        def transform(x):
+            return np.reshape(x, [1, 28, 28])
+
+        train_dataset = paddle.vision.datasets.MNIST(
+            mode='train', backend='cv2', transform=transform)
+        train_reader = paddle.io.DataLoader(
+            train_dataset, drop_last=True, places=place, batch_size=64)
+        val_dataset = paddle.vision.datasets.MNIST(
+            mode='test', backend='cv2', transform=transform)
+        test_reader = paddle.io.DataLoader(
+            val_dataset, places=place, batch_size=64)
+
+        def train(model):
+            adam = paddle.optimizer.Adam(
+                learning_rate=0.001, parameters=model.parameters())
+            epoch_num = 1
+            for epoch in range(epoch_num):
+                model.train()
+                for batch_id, data in enumerate(train_reader):
+                    img = paddle.to_tensor(data[0])
+                    label = paddle.to_tensor(data[1])
                    out = model(img)
-                    acc_top1 = fluid.layers.accuracy(
-                        input=out, label=label, k=1)
-                    acc_top5 = fluid.layers.accuracy(
-                        input=out, label=label, k=5)
-                    avg_acc[0].append(acc_top1.numpy())
-                    avg_acc[1].append(acc_top5.numpy())
+                    acc = paddle.metric.accuracy(out, label)
+                    loss = paddle.nn.functional.loss.cross_entropy(out, label)
+                    avg_loss = paddle.mean(loss)
+                    avg_loss.backward()
+                    adam.minimize(avg_loss)
+                    model.clear_gradients()
                    if batch_id % 100 == 0:
                        _logger.info(
-                            "Test | step {}: acc1 = {:}, acc5 = {:}".format(
-                                batch_id, acc_top1.numpy(), acc_top5.numpy()))
-
-                _logger.info("Test |Average: acc_top1 {}, acc_top5 {}".format(
-                    np.mean(avg_acc[0]), np.mean(avg_acc[1])))
-                return np.mean(avg_acc[0]), np.mean(avg_acc[1])
-
-            train(lenet)
-            top1_1, top5_1 = test(lenet)
-
-            quant_lenet.__init__()
-            train(quant_lenet)
-            top1_2, top5_2 = test(quant_lenet)
-
-            # values before quantization and after quantization should be close
-            _logger.info("Before quantization: top1: {}, top5: {}".format(
-                top1_1, top5_1))
-            _logger.info("After quantization: top1: {}, top5: {}".format(
-                top1_2, top5_2))
+                            "Train | At epoch {} step {}: loss = {:}, acc= {:}".
+                            format(epoch, batch_id,
+                                   avg_loss.numpy(), acc.numpy()))
+
+        def test(model):
+            model.eval()
+            avg_acc = [[], []]
+            for batch_id, data in enumerate(test_reader):
+                img = paddle.to_tensor(data[0])
+                label = paddle.to_tensor(data[1])
+
+                out = model(img)
+                acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
+                acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)
+                avg_acc[0].append(acc_top1.numpy())
+                avg_acc[1].append(acc_top5.numpy())
+                if batch_id % 100 == 0:
+                    _logger.info(
+                        "Test | step {}: acc1 = {:}, acc5 = {:}".format(
+                            batch_id, acc_top1.numpy(), acc_top5.numpy()))
+
+            _logger.info("Test |Average: acc_top1 {}, acc_top5 {}".format(
+                np.mean(avg_acc[0]), np.mean(avg_acc[1])))
+            return np.mean(avg_acc[0]), np.mean(avg_acc[1])
+
+        train(lenet)
+        top1_1, top5_1 = test(lenet)
+
+        quant_lenet.__init__()
+        train(quant_lenet)
+        top1_2, top5_2 = test(quant_lenet)
+
+        # values before quantization and after quantization should be close
+        _logger.info("Before quantization: top1: {}, top5: {}".format(top1_1,
+                                                                      top5_1))
+        _logger.info("After quantization: top1: {}, top5: {}".format(top1_2,
+                                                                     top5_2))


 class TestImperativeQatUserDefineConfig(unittest.TestCase):
@@ -165,88 +159,84 @@ class TestImperativeQatUserDefineConfig(unittest.TestCase):
    """

    def test_qat_acc(self):
-        with fluid.dygraph.guard():
-            lenet = ImperativeLenet()
-            quant_config = {
-                'weight_quantize_type': 'abs_max',
-                'activation_quantize_type': 'moving_average_abs_max',
-                'quantizable_layer_type': ['Conv2D', 'Linear']
-            }
-            quant_lenet = quant_aware(lenet, quant_config)
-
-            train_reader = paddle.batch(
-                paddle.dataset.mnist.train(), batch_size=32, drop_last=True)
-            test_reader = paddle.batch(
-                paddle.dataset.mnist.test(), batch_size=32)
-
-            def train(model):
-                adam = AdamOptimizer(
-                    learning_rate=0.001, parameter_list=model.parameters())
-                epoch_num = 1
-                for epoch in range(epoch_num):
-                    model.train()
-                    for batch_id, data in enumerate(train_reader()):
-                        x_data = np.array(
-                            [x[0].reshape(1, 28, 28)
-                             for x in data]).astype('float32')
-                        y_data = np.array(
-                            [x[1] for x in data]).astype('int64').reshape(-1, 1)
-
-                        img = fluid.dygraph.to_variable(x_data)
-                        label = fluid.dygraph.to_variable(y_data)
-                        out = model(img)
-                        acc = fluid.layers.accuracy(out, label)
-                        loss = fluid.layers.cross_entropy(out, label)
-                        avg_loss = fluid.layers.mean(loss)
-                        avg_loss.backward()
-                        adam.minimize(avg_loss)
-                        model.clear_gradients()
-                        if batch_id % 100 == 0:
-                            _logger.info(
-                                "Train | At epoch {} step {}: loss = {:}, acc= {:}".
-                                format(epoch, batch_id,
-                                       avg_loss.numpy(), acc.numpy()))
-
-            def test(model):
-                model.eval()
-                avg_acc = [[], []]
-                for batch_id, data in enumerate(test_reader()):
-                    x_data = np.array([x[0].reshape(1, 28, 28)
-                                       for x in data]).astype('float32')
-                    y_data = np.array(
-                        [x[1] for x in data]).astype('int64').reshape(-1, 1)
-
-                    img = fluid.dygraph.to_variable(x_data)
-                    label = fluid.dygraph.to_variable(y_data)
-
+        lenet = ImperativeLenet()
+        quant_config = {
+            'weight_quantize_type': 'abs_max',
+            'activation_quantize_type': 'moving_average_abs_max',
+            'quantizable_layer_type': ['Conv2D', 'Linear']
+        }
+        quant_lenet = quant_aware(lenet, quant_config)
+
+        place = paddle.CUDAPlace(0) if paddle.is_compiled_with_cuda(
+        ) else paddle.CPUPlace()
+
+        def transform(x):
+            return np.reshape(x, [1, 28, 28])
+
+        train_dataset = paddle.vision.datasets.MNIST(
+            mode='train', backend='cv2', transform=transform)
+        train_reader = paddle.io.DataLoader(
+            train_dataset, drop_last=True, places=place, batch_size=64)
+        val_dataset = paddle.vision.datasets.MNIST(
+            mode='test', backend='cv2', transform=transform)
+        test_reader = paddle.io.DataLoader(
+            val_dataset, places=place, batch_size=64)
+
+        def train(model):
+            adam = paddle.optimizer.Adam(
+                learning_rate=0.001, parameters=model.parameters())
+            epoch_num = 1
+            for epoch in range(epoch_num):
+                model.train()
+                for batch_id, data in enumerate(train_reader):
+                    img = paddle.to_tensor(data[0])
+                    label = paddle.to_tensor(data[1])
                    out = model(img)
-                    acc_top1 = fluid.layers.accuracy(
-                        input=out, label=label, k=1)
-                    acc_top5 = fluid.layers.accuracy(
-                        input=out, label=label, k=5)
-                    avg_acc[0].append(acc_top1.numpy())
-                    avg_acc[1].append(acc_top5.numpy())
+                    acc = paddle.metric.accuracy(out, label)
+                    loss = paddle.nn.functional.loss.cross_entropy(out, label)
+                    avg_loss = paddle.mean(loss)
+                    avg_loss.backward()
+                    adam.minimize(avg_loss)
+                    model.clear_gradients()
                    if batch_id % 100 == 0:
                        _logger.info(
-                            "Test | step {}: acc1 = {:}, acc5 = {:}".format(
-                                batch_id, acc_top1.numpy(), acc_top5.numpy()))
-
-                _logger.info("Test |Average: acc_top1 {}, acc_top5 {}".format(
-                    np.mean(avg_acc[0]), np.mean(avg_acc[1])))
-                return np.mean(avg_acc[0]), np.mean(avg_acc[1])
-
-            train(lenet)
-            top1_1, top5_1 = test(lenet)
-
-            quant_lenet.__init__()
-            train(quant_lenet)
-            top1_2, top5_2 = test(quant_lenet)
-
-            # values before quantization and after quantization should be close
-            _logger.info("Before quantization: top1: {}, top5: {}".format(
-                top1_1, top5_1))
-            _logger.info("After quantization: top1: {}, top5: {}".format(
-                top1_2, top5_2))
+                            "Train | At epoch {} step {}: loss = {:}, acc= {:}".
+                            format(epoch, batch_id,
+                                   avg_loss.numpy(), acc.numpy()))
+
+        def test(model):
+            model.eval()
+            avg_acc = [[], []]
+            for batch_id, data in enumerate(test_reader):
+                img = paddle.to_tensor(data[0])
+                label = paddle.to_tensor(data[1])
+
+                out = model(img)
+                acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
+                acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)
+                avg_acc[0].append(acc_top1.numpy())
+                avg_acc[1].append(acc_top5.numpy())
+                if batch_id % 100 == 0:
+                    _logger.info(
+                        "Test | step {}: acc1 = {:}, acc5 = {:}".format(
+                            batch_id, acc_top1.numpy(), acc_top5.numpy()))
+
+            _logger.info("Test |Average: acc_top1 {}, acc_top5 {}".format(
+                np.mean(avg_acc[0]), np.mean(avg_acc[1])))
+            return np.mean(avg_acc[0]), np.mean(avg_acc[1])
+
+        train(lenet)
+        top1_1, top5_1 = test(lenet)
+
+        quant_lenet.__init__()
+        train(quant_lenet)
+        top1_2, top5_2 = test(quant_lenet)
+
+        # values before quantization and after quantization should be close
+        _logger.info("Before quantization: top1: {}, top5: {}".format(top1_1,
+                                                                      top5_1))
+        _logger.info("After quantization: top1: {}, top5: {}".format(top1_2,
+                                                                     top5_2))


 if __name__ == '__main__':

--- a/tests/test_quant_aware.py
+++ b/tests/test_quant_aware.py
@@ -53,7 +53,7 @@ class TestQuantAwareCase1(StaticCase):

    def test_quant_op(self):
        startup_prog, train_prog = self.get_model()
-        place = paddle.CUDAPlace(0) if paddle.fluid.is_compiled_with_cuda(
+        place = paddle.CUDAPlace(0) if paddle.is_compiled_with_cuda(
        ) else paddle.CPUPlace()
        exe = paddle.static.Executor(place)
        exe.run(startup_prog)
@@ -107,24 +107,26 @@ class TestQuantAwareCase2(StaticCase):
        main_prog = paddle.static.default_main_program()
        val_prog = main_prog.clone(for_test=True)

-        place = paddle.CUDAPlace(0) if paddle.fluid.is_compiled_with_cuda(
+        place = paddle.CUDAPlace(0) if paddle.is_compiled_with_cuda(
        ) else paddle.static.CPUPlace()
        exe = paddle.static.Executor(place)
        exe.run(paddle.static.default_startup_program())
-        train_loader = paddle.io.DataLoader.from_generator(
-            feed_list=[image, label],
-            capacity=512,
-            use_double_buffer=True,
-            iterable=True)
-        valid_loader = paddle.io.DataLoader.from_generator(
+
+        def transform(x):
+            return np.reshape(x, [1, 28, 28])
+
+        train_dataset = paddle.vision.datasets.MNIST(
+            mode='train', backend='cv2', transform=transform)
+        test_dataset = paddle.vision.datasets.MNIST(
+            mode='test', backend='cv2', transform=transform)
+        train_loader = paddle.io.DataLoader(
+            train_dataset,
+            places=place,
            feed_list=[image, label],
-            capacity=512,
-            use_double_buffer=True,
-            iterable=True)
-        train_reader = paddle.batch(paddle.dataset.mnist.train(), batch_size=64)
-        eval_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=64)
-        train_loader.set_sample_list_generator(train_reader, place)
-        valid_loader.set_sample_list_generator(eval_reader, place)
+            drop_last=True,
+            batch_size=64)
+        valid_loader = paddle.io.DataLoader(
+            test_dataset, places=place, feed_list=[image, label], batch_size=64)

        def train(program):
            iter = 0

--- a/tests/test_quant_aware_user_defined.py
+++ b/tests/test_quant_aware_user_defined.py
@@ -79,27 +79,26 @@ class TestQuantAwareCase1(StaticCase):
        main_prog = paddle.static.default_main_program()
        val_prog = main_prog.clone(for_test=True)

-        train_loader = paddle.io.DataLoader.from_generator(
-            feed_list=[image, label],
-            capacity=512,
-            use_double_buffer=True,
-            iterable=True)
-        valid_loader = paddle.io.DataLoader.from_generator(
-            feed_list=[image, label],
-            capacity=512,
-            use_double_buffer=True,
-            iterable=True)
-
-        place = paddle.CUDAPlace(0) if paddle.fluid.is_compiled_with_cuda(
+        place = paddle.CUDAPlace(0) if paddle.is_compiled_with_cuda(
        ) else paddle.CPUPlace()
        exe = paddle.static.Executor(place)
        exe.run(paddle.static.default_startup_program())

-        train_reader = paddle.batch(paddle.dataset.mnist.train(), batch_size=64)
-        eval_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=64)
+        def transform(x):
+            return np.reshape(x, [1, 28, 28])

-        train_loader.set_sample_list_generator(train_reader, place)
-        valid_loader.set_sample_list_generator(eval_reader, place)
+        train_dataset = paddle.vision.datasets.MNIST(
+            mode='train', backend='cv2', transform=transform)
+        test_dataset = paddle.vision.datasets.MNIST(
+            mode='test', backend='cv2', transform=transform)
+        train_loader = paddle.io.DataLoader(
+            train_dataset,
+            places=place,
+            feed_list=[image, label],
+            drop_last=True,
+            batch_size=64)
+        valid_loader = paddle.io.DataLoader(
+            test_dataset, places=place, feed_list=[image, label], batch_size=64)

        def train(program):
            iter = 0

--- a/tests/test_quant_post.py
+++ b/tests/test_quant_post.py
@@ -43,24 +43,26 @@ class TestQuantAwareCase1(StaticCase):
        main_prog = paddle.static.default_main_program()
        val_prog = main_prog.clone(for_test=True)

-        place = paddle.CUDAPlace(0) if paddle.fluid.is_compiled_with_cuda(
+        place = paddle.CUDAPlace(0) if paddle.is_compiled_with_cuda(
        ) else paddle.CPUPlace()
        exe = paddle.static.Executor(place)
        exe.run(paddle.static.default_startup_program())
-        train_loader = paddle.io.DataLoader.from_generator(
-            feed_list=[image, label],
-            capacity=512,
-            use_double_buffer=True,
-            iterable=True)
-        valid_loader = paddle.io.DataLoader.from_generator(
+
+        def transform(x):
+            return np.reshape(x, [1, 28, 28])
+
+        train_dataset = paddle.vision.datasets.MNIST(
+            mode='train', backend='cv2', transform=transform)
+        test_dataset = paddle.vision.datasets.MNIST(
+            mode='test', backend='cv2', transform=transform)
+        train_loader = paddle.io.DataLoader(
+            train_dataset,
+            places=place,
            feed_list=[image, label],
-            capacity=512,
-            use_double_buffer=True,
-            iterable=True)
-        train_reader = paddle.batch(paddle.dataset.mnist.train(), batch_size=64)
-        eval_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=64)
-        train_loader.set_sample_list_generator(train_reader, place)
-        valid_loader.set_sample_list_generator(eval_reader, place)
+            drop_last=True,
+            batch_size=64)
+        valid_loader = paddle.io.DataLoader(
+            test_dataset, places=place, feed_list=[image, label], batch_size=64)

        def train(program):
            iter = 0

--- a/tests/test_quant_post_only_weight.py
+++ b/tests/test_quant_post_only_weight.py
@@ -43,26 +43,26 @@ class TestQuantPostOnlyWeightCase1(StaticCase):
        main_prog = paddle.static.default_main_program()
        val_prog = main_prog.clone(for_test=True)

-        place = paddle.CUDAPlace(0) if paddle.fluid.is_compiled_with_cuda(
+        place = paddle.CUDAPlace(0) if paddle.is_compiled_with_cuda(
        ) else paddle.CPUPlace()
        exe = paddle.static.Executor(place)
        exe.run(paddle.static.default_startup_program())
-        train_loader = paddle.io.DataLoader.from_generator(
-            feed_list=[image, label],
-            capacity=512,
-            use_double_buffer=True,
-            iterable=True)
-        valid_loader = paddle.io.DataLoader.from_generator(
-            feed_list=[image, label],
-            capacity=512,
-            use_double_buffer=True,
-            iterable=True)

-        train_reader = paddle.batch(paddle.dataset.mnist.train(), batch_size=64)
-        eval_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=64)
+        def transform(x):
+            return np.reshape(x, [1, 28, 28])

-        train_loader.set_sample_list_generator(train_reader, place)
-        valid_loader.set_sample_list_generator(eval_reader, place)
+        train_dataset = paddle.vision.datasets.MNIST(
+            mode='train', backend='cv2', transform=transform)
+        test_dataset = paddle.vision.datasets.MNIST(
+            mode='test', backend='cv2', transform=transform)
+        train_loader = paddle.io.DataLoader(
+            train_dataset,
+            places=place,
+            feed_list=[image, label],
+            drop_last=True,
+            batch_size=64)
+        valid_loader = paddle.io.DataLoader(
+            test_dataset, places=place, feed_list=[image, label], batch_size=64)

        def train(program):
            iter = 0