diff --git a/demo/nas/block_sa_nas_mobilenetv2.py b/demo/nas/block_sa_nas_mobilenetv2.py
index 5c6c8e52fcf573b2d53ec990dbeede06c2ab1e36..9fc9f9f50f221ef647bd4373fdd112c54b785846 100644
--- a/demo/nas/block_sa_nas_mobilenetv2.py
+++ b/demo/nas/block_sa_nas_mobilenetv2.py
@@ -6,8 +6,10 @@ import ast
 import logging
 import time
 import paddle
-import paddle.fluid as fluid
-from paddle.fluid.param_attr import ParamAttr
+import paddle.nn as nn
+import paddle.nn.functional as F
+import paddle.static as static
+from paddle import ParamAttr
 from paddleslim.analysis import flops
 from paddleslim.nas import SANAS
 from paddleslim.common import get_logger
@@ -17,18 +19,6 @@ import imagenet_reader
 _logger = get_logger(__name__, level=logging.INFO)
 
 
-def create_data_loader(image_shape):
-    data_shape = [None] + image_shape
-    data = fluid.data(name='data', shape=data_shape, dtype='float32')
-    label = fluid.data(name='label', shape=[None, 1], dtype='int64')
-    data_loader = fluid.io.DataLoader.from_generator(
-        feed_list=[data, label],
-        capacity=1024,
-        use_double_buffer=True,
-        iterable=True)
-    return data_loader, data, label
-
-
 def conv_bn_layer(input,
                   filter_size,
                   num_filters,
@@ -38,7 +28,7 @@ def conv_bn_layer(input,
                   act=None,
                   name=None,
                   use_cudnn=True):
-    conv = fluid.layers.conv2d(
+    conv = static.nn.conv2d(
         input,
         num_filters=num_filters,
         filter_size=filter_size,
@@ -50,7 +40,7 @@ def conv_bn_layer(input,
         param_attr=ParamAttr(name=name + '_weights'),
         bias_attr=False)
     bn_name = name + '_bn'
-    return fluid.layers.batch_norm(
+    return static.nn.batch_norm(
         input=conv,
         act=act,
         param_attr=ParamAttr(name=bn_name + '_scale'),
@@ -61,6 +51,16 @@ def conv_bn_layer(input,
 
 def search_mobilenetv2_block(config, args, image_size):
     image_shape = [3, image_size, image_size]
+    if args.data == 'cifar10':
+        train_dataset = paddle.vision.datasets.Cifar10(mode='train')
+        val_dataset = paddle.vision.datasets.Cifar10(mode='test')
+
+    elif args.data == 'imagenet':
+        train_dataset = imagenet_reader.ImageNetDataset(mode='train')
+        val_dataset = imagenet_reader.ImageNetDataset(mode='val')
+
+    places = static.cuda_places() if args.use_gpu else static.cpu_places()
+    place = places[0]
     if args.is_server:
         sa_nas = SANAS(
             config,
@@ -77,11 +77,33 @@ def search_mobilenetv2_block(config, args, image_size):
     for step in range(args.search_steps):
         archs = sa_nas.next_archs()[0]
 
-        train_program = fluid.Program()
-        test_program = fluid.Program()
-        startup_program = fluid.Program()
-        with fluid.program_guard(train_program, startup_program):
-            train_loader, data, label = create_data_loader(image_shape)
+        train_program = static.Program()
+        test_program = static.Program()
+        startup_program = static.Program()
+        with static.program_guard(train_program, startup_program):
+            data_shape = [None] + image_shape
+            data = static.data(name='data', shape=data_shape, dtype='float32')
+            label = static.data(name='label', shape=[None, 1], dtype='int64')
+            if args.data == 'cifar10':
+                paddle.assign(paddle.reshape(label, [-1, 1]), label)
+            train_loader = paddle.io.DataLoader(
+                train_dataset,
+                places=places,
+                feed_list=[data, label],
+                drop_last=True,
+                batch_size=args.batch_size,
+                return_list=False,
+                shuffle=True,
+                use_shared_memory=True,
+                num_workers=4)
+            val_loader = paddle.io.DataLoader(
+                val_dataset,
+                places=place,
+                feed_list=[data, label],
+                drop_last=False,
+                batch_size=args.batch_size,
+                return_list=False,
+                shuffle=False)
             data = conv_bn_layer(
                 input=data,
                 num_filters=32,
@@ -99,32 +121,27 @@ def search_mobilenetv2_block(config, args, image_size):
                 padding='SAME',
                 act='relu6',
                 name='mobilenetv2_last_conv')
-            data = fluid.layers.pool2d(
-                input=data,
-                pool_size=7,
-                pool_stride=1,
-                pool_type='avg',
-                global_pooling=True,
-                name='mobilenetv2_last_pool')
-            output = fluid.layers.fc(
-                input=data,
+            data = F.adaptive_avg_pool2d(
+                data, output_size=[1, 1], name='mobilenetv2_last_pool')
+            output = static.nn.fc(
+                x=data,
                 size=args.class_dim,
-                param_attr=ParamAttr(name='mobilenetv2_fc_weights'),
+                weight_attr=ParamAttr(name='mobilenetv2_fc_weights'),
                 bias_attr=ParamAttr(name='mobilenetv2_fc_offset'))
 
-            softmax_out = fluid.layers.softmax(input=output, use_cudnn=False)
-            cost = fluid.layers.cross_entropy(input=softmax_out, label=label)
-            avg_cost = fluid.layers.mean(cost)
-            acc_top1 = fluid.layers.accuracy(
+            softmax_out = F.softmax(output)
+            cost = F.cross_entropy(softmax_out, label=label)
+            avg_cost = paddle.mean(cost)
+            acc_top1 = paddle.metric.accuracy(
                 input=softmax_out, label=label, k=1)
-            acc_top5 = fluid.layers.accuracy(
+            acc_top5 = paddle.metric.accuracy(
                 input=softmax_out, label=label, k=5)
             test_program = train_program.clone(for_test=True)
 
-            optimizer = fluid.optimizer.Momentum(
+            optimizer = paddle.optimizer.Momentum(
                 learning_rate=0.1,
                 momentum=0.9,
-                regularization=fluid.regularizer.L2Decay(1e-4))
+                weight_decay=paddle.regularizer.L2Decay(1e-4))
             optimizer.minimize(avg_cost)
 
         current_flops = flops(train_program)
@@ -132,39 +149,11 @@ def search_mobilenetv2_block(config, args, image_size):
         if current_flops > int(321208544):
             continue
 
-        place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
-        exe = fluid.Executor(place)
+        exe = static.Executor(place)
         exe.run(startup_program)
 
-        if args.data == 'cifar10':
-            train_reader = paddle.fluid.io.batch(
-                paddle.reader.shuffle(
-                    paddle.dataset.cifar.train10(cycle=False), buf_size=1024),
-                batch_size=args.batch_size,
-                drop_last=True)
-
-            test_reader = paddle.fluid.io.batch(
-                paddle.dataset.cifar.test10(cycle=False),
-                batch_size=args.batch_size,
-                drop_last=False)
-        elif args.data == 'imagenet':
-            train_reader = paddle.fluid.io.batch(
-                imagenet_reader.train(),
-                batch_size=args.batch_size,
-                drop_last=True)
-            test_reader = paddle.fluid.io.batch(
-                imagenet_reader.val(),
-                batch_size=args.batch_size,
-                drop_last=False)
-
-        test_loader, _, _ = create_data_loader(image_shape)
-        train_loader.set_sample_list_generator(
-            train_reader,
-            places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places())
-        test_loader.set_sample_list_generator(test_reader, places=place)
-
-        build_strategy = fluid.BuildStrategy()
-        train_compiled_program = fluid.CompiledProgram(
+        build_strategy = static.BuildStrategy()
+        train_compiled_program = static.CompiledProgram(
             train_program).with_data_parallel(
                 loss_name=avg_cost.name, build_strategy=build_strategy)
         for epoch_id in range(args.retain_epoch):
@@ -181,7 +170,7 @@ def search_mobilenetv2_block(config, args, image_size):
                         format(step, epoch_id, batch_id, outs[0], batch_time))
 
         reward = []
-        for batch_id, data in enumerate(test_loader()):
+        for batch_id, data in enumerate(val_loader()):
             test_fetches = [avg_cost.name, acc_top1.name, acc_top5.name]
             batch_reward = exe.run(test_program,
                                    feed=data,
diff --git a/demo/nas/image_classification_nas_quick_start.ipynb b/demo/nas/image_classification_nas_quick_start.ipynb
deleted file mode 100644
index 72f04cf848b8caec65b65b177c99d15ebfa05cc6..0000000000000000000000000000000000000000
--- a/demo/nas/image_classification_nas_quick_start.ipynb
+++ /dev/null
@@ -1,163 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# 图像分类网络结构搜索-快速开始\n",
-    "\n",
-    "该教程以图像分类模型MobileNetV2为例,说明如何在cifar10数据集上快速使用[网络结构搜索接口](../api/nas_api.md)。\n",
-    "该示例包含以下步骤:\n",
-    "\n",
-    "1. 导入依赖\n",
-    "2. 初始化SANAS搜索实例\n",
-    "3. 构建网络\n",
-    "4. 启动搜索实验\n",
-    "\n",
-    "以下章节依次介绍每个步骤的内容。"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 1. 导入依赖\n",
-    "请确认已正确安装Paddle,导入需要的依赖包。"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import paddle\n",
-    "import paddle.fluid as fluid\n",
-    "import paddleslim as slim\n",
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 2. 初始化SANAS搜索实例"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sanas = slim.nas.SANAS(configs=[('MobileNetV2Space')], server_addr=(\"\", 8337))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 3. 构建网络\n",
-    "根据传入的网络结构构造训练program和测试program。"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def build_program(archs):\n",
-    "    train_program = fluid.Program()\n",
-    "    startup_program = fluid.Program()\n",
-    "    with fluid.program_guard(train_program, startup_program):\n",
-    "        data = fluid.data(name='data', shape=[None, 3, 32, 32], dtype='float32')\n",
-    "        label = fluid.data(name='label', shape=[None, 1], dtype='int64')\n",
-    "        output = archs(data)\n",
-    "        output = fluid.layers.fc(input=output, size=10)\n",
-    "\n",
-    "        softmax_out = fluid.layers.softmax(input=output, use_cudnn=False)\n",
-    "        cost = fluid.layers.cross_entropy(input=softmax_out, label=label)\n",
-    "        avg_cost = fluid.layers.mean(cost)\n",
-    "        acc_top1 = fluid.layers.accuracy(input=softmax_out, label=label, k=1)\n",
-    "        acc_top5 = fluid.layers.accuracy(input=softmax_out, label=label, k=5)\n",
-    "        test_program = fluid.default_main_program().clone(for_test=True)\n",
-    "            \n",
-    "        optimizer = fluid.optimizer.Adam(learning_rate=0.1)\n",
-    "        optimizer.minimize(avg_cost)\n",
-    "\n",
-    "        place = fluid.CPUPlace()\n",
-    "        exe = fluid.Executor(place)\n",
-    "        exe.run(startup_program)\n",
-    "    return exe, train_program, test_program, (data, label), avg_cost, acc_top1, acc_top5"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 4. 启动搜索实验\n",
-    "获取每一轮的模型结构并开始训练。该教程中使用FLOPs作为约束条件,搜索实验一共搜索3个step,表示搜索到3个满足条件的模型结构进行训练,每搜索到一个网络结构训练7个epoch。"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for step in range(3):\n",
-    "    archs = sanas.next_archs()[0]\n",
-    "    exe, train_program, test_progarm, inputs, avg_cost, acc_top1, acc_top5 = build_program(archs)\n",
-    "\n",
-    "    current_flops = slim.analysis.flops(train_program)\n",
-    "    if current_flops > 321208544:\n",
-    "        continue\n",
-    "    \n",
-    "    train_reader = paddle.fluid.io.batch(paddle.reader.shuffle(paddle.dataset.cifar.train10(cycle=False),                          buf_size=1024),batch_size=256)\n",
-    "    train_feeder = fluid.DataFeeder(inputs, fluid.CPUPlace())\n",
-    "    test_reader = paddle.fluid.io.batch(paddle.dataset.cifar.test10(cycle=False),\n",
-    "               batch_size=256)\n",
-    "    test_feeder = fluid.DataFeeder(inputs, fluid.CPUPlace())\n",
-    "\n",
-    "    outputs = [avg_cost.name, acc_top1.name, acc_top5.name]\n",
-    "    for epoch in range(7):\n",
-    "        for data in train_reader():\n",
-    "            loss, acc1, acc5 = exe.run(train_program, feed=train_feeder.feed(data), fetch_list = outputs)\n",
-    "            print(\"TRAIN: loss: {}, acc1: {}, acc5:{}\".format(loss, acc1, acc5))\n",
-    "\n",
-    "    reward = []\n",
-    "    for data in test_reader():\n",
-    "        batch_reward = exe.run(test_program, feed=test_feeder.feed(data), fetch_list = outputs)\n",
-    "        reward_avg = np.mean(np.array(batch_reward), axis=1)\n",
-    "        reward.append(reward_avg)\n",
-    "        print(\"TEST: loss: {}, acc1: {}, acc5:{}\".format(batch_reward[0], batch_reward[1], batch_reward[2]))\n",
-    "    finally_reward = np.mean(np.array(reward), axis=0)\n",
-    "    print(\"FINAL TEST: avg_cost: {}, acc1: {}, acc5: {}\".format(finally_reward[0], finally_reward[1], finally_reward[2]))\n",
-    "\n",
-    "    sanas.reward(float(finally_reward[1]))"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 2",
-   "language": "python",
-   "name": "python2"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.12"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/demo/nas/parl_nas_mobilenetv2.py b/demo/nas/parl_nas_mobilenetv2.py
index 6b8bf154502429da38ebd02c344df831722a187f..732c8f28dc6aef4e8848eeb9c0d2e0e1b02d7674 100644
--- a/demo/nas/parl_nas_mobilenetv2.py
+++ b/demo/nas/parl_nas_mobilenetv2.py
@@ -8,8 +8,9 @@ import argparse
 import ast
 import logging
 import paddle
-import paddle.fluid as fluid
-from paddle.fluid.param_attr import ParamAttr
+import paddle.nn as nn
+import paddle.static as static
+import paddle.nn.functional as F
 from paddleslim.nas import RLNAS
 from paddleslim.common import get_logger
 from optimizer import create_optimizer
@@ -18,36 +19,50 @@ import imagenet_reader
 _logger = get_logger(__name__, level=logging.INFO)
 
 
-def create_data_loader(image_shape):
-    data_shape = [None] + image_shape
-    data = fluid.data(name='data', shape=data_shape, dtype='float32')
-    label = fluid.data(name='label', shape=[None, 1], dtype='int64')
-    data_loader = fluid.io.DataLoader.from_generator(
-        feed_list=[data, label],
-        capacity=1024,
-        use_double_buffer=True,
-        iterable=True)
-    return data_loader, data, label
-
-
 def build_program(main_program,
                   startup_program,
                   image_shape,
+                  dataset,
                   archs,
                   args,
+                  places,
                   is_test=False):
-    with fluid.program_guard(main_program, startup_program):
-        with fluid.unique_name.guard():
-            data_loader, data, label = create_data_loader(image_shape)
+    with static.program_guard(main_program, startup_program):
+        with paddle.utils.unique_name.guard():
+            data_shape = [None] + image_shape
+            data = static.data(name='data', shape=data_shape, dtype='float32')
+            label = static.data(name='label', shape=[None, 1], dtype='int64')
+            if args.data == 'cifar10':
+                paddle.assign(paddle.reshape(label, [-1, 1]), label)
+            if is_test:
+                data_loader = paddle.io.DataLoader(
+                    dataset,
+                    places=places,
+                    feed_list=[data, label],
+                    drop_last=False,
+                    batch_size=args.batch_size,
+                    return_list=False,
+                    shuffle=False)
+            else:
+                data_loader = paddle.io.DataLoader(
+                    dataset,
+                    places=places,
+                    feed_list=[data, label],
+                    drop_last=True,
+                    batch_size=args.batch_size,
+                    return_list=False,
+                    shuffle=True,
+                    use_shared_memory=True,
+                    num_workers=4)
             output = archs(data)
-            output = fluid.layers.fc(input=output, size=args.class_dim)
+            output = static.nn.fc(output, size=args.class_dim)
 
-            softmax_out = fluid.layers.softmax(input=output, use_cudnn=False)
-            cost = fluid.layers.cross_entropy(input=softmax_out, label=label)
-            avg_cost = fluid.layers.mean(cost)
-            acc_top1 = fluid.layers.accuracy(
+            softmax_out = F.softmax(output)
+            cost = F.cross_entropy(softmax_out, label=label)
+            avg_cost = paddle.mean(cost)
+            acc_top1 = paddle.metric.accuracy(
                 input=softmax_out, label=label, k=1)
-            acc_top5 = fluid.layers.accuracy(
+            acc_top5 = paddle.metric.accuracy(
                 input=softmax_out, label=label, k=5)
 
             if is_test == False:
@@ -57,6 +72,8 @@ def build_program(main_program,
 
 
 def search_mobilenetv2(config, args, image_size, is_server=True):
+    places = static.cuda_places() if args.use_gpu else static.cpu_places()
+    place = places[0]
     if is_server:
         ### start a server and a client
         rl_nas = RLNAS(
@@ -76,6 +93,14 @@ def search_mobilenetv2(config, args, image_size, is_server=True):
             is_server=False)
 
     image_shape = [3, image_size, image_size]
+    if args.data == 'cifar10':
+        train_dataset = paddle.vision.datasets.Cifar10(mode='train')
+        val_dataset = paddle.vision.datasets.Cifar10(mode='test')
+
+    elif args.data == 'imagenet':
+        train_dataset = imagenet_reader.ImageNetDataset(mode='train')
+        val_dataset = imagenet_reader.ImageNetDataset(mode='val')
+
     for step in range(args.search_steps):
         if step == 0:
             action_prev = [1. for _ in rl_nas.range_tables]
@@ -85,53 +110,29 @@ def search_mobilenetv2(config, args, image_size, is_server=True):
         obs.extend(action_prev)
         archs = rl_nas.next_archs(obs=obs)[0][0]
 
-        train_program = fluid.Program()
-        test_program = fluid.Program()
-        startup_program = fluid.Program()
+        train_program = static.Program()
+        test_program = static.Program()
+        startup_program = static.Program()
         train_loader, avg_cost, acc_top1, acc_top5 = build_program(
-            train_program, startup_program, image_shape, archs, args)
+            train_program, startup_program, image_shape, train_dataset, archs,
+            args, places)
 
         test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program(
             test_program,
             startup_program,
             image_shape,
+            val_dataset,
             archs,
             args,
+            place,
             is_test=True)
         test_program = test_program.clone(for_test=True)
 
-        place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
-        exe = fluid.Executor(place)
+        exe = static.Executor(place)
         exe.run(startup_program)
 
-        if args.data == 'cifar10':
-            train_reader = paddle.fluid.io.batch(
-                paddle.reader.shuffle(
-                    paddle.dataset.cifar.train10(cycle=False), buf_size=1024),
-                batch_size=args.batch_size,
-                drop_last=True)
-
-            test_reader = paddle.fluid.io.batch(
-                paddle.dataset.cifar.test10(cycle=False),
-                batch_size=args.batch_size,
-                drop_last=False)
-        elif args.data == 'imagenet':
-            train_reader = paddle.fluid.io.batch(
-                imagenet_reader.train(),
-                batch_size=args.batch_size,
-                drop_last=True)
-            test_reader = paddle.fluid.io.batch(
-                imagenet_reader.val(),
-                batch_size=args.batch_size,
-                drop_last=False)
-
-        train_loader.set_sample_list_generator(
-            train_reader,
-            places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places())
-        test_loader.set_sample_list_generator(test_reader, places=place)
-
-        build_strategy = fluid.BuildStrategy()
-        train_compiled_program = fluid.CompiledProgram(
+        build_strategy = static.BuildStrategy()
+        train_compiled_program = static.CompiledProgram(
             train_program).with_data_parallel(
                 loss_name=avg_cost.name, build_strategy=build_strategy)
         for epoch_id in range(args.retain_epoch):
diff --git a/demo/nas/rl_nas_mobilenetv2.py b/demo/nas/rl_nas_mobilenetv2.py
index ee86c7591cef8bf743f2ff6b8397243380fbee4d..27445d4d742883e1ff01ac825e62d726bf104150 100644
--- a/demo/nas/rl_nas_mobilenetv2.py
+++ b/demo/nas/rl_nas_mobilenetv2.py
@@ -8,8 +8,9 @@ import argparse
 import ast
 import logging
 import paddle
-import paddle.fluid as fluid
-from paddle.fluid.param_attr import ParamAttr
+import paddle.nn as nn
+import paddle.static as static
+import paddle.nn.functional as F
 from paddleslim.nas import RLNAS
 from paddleslim.common import get_logger
 from optimizer import create_optimizer
@@ -18,36 +19,50 @@ import imagenet_reader
 _logger = get_logger(__name__, level=logging.INFO)
 
 
-def create_data_loader(image_shape):
-    data_shape = [None] + image_shape
-    data = fluid.data(name='data', shape=data_shape, dtype='float32')
-    label = fluid.data(name='label', shape=[None, 1], dtype='int64')
-    data_loader = fluid.io.DataLoader.from_generator(
-        feed_list=[data, label],
-        capacity=1024,
-        use_double_buffer=True,
-        iterable=True)
-    return data_loader, data, label
-
-
 def build_program(main_program,
                   startup_program,
                   image_shape,
+                  dataset,
                   archs,
                   args,
+                  places,
                   is_test=False):
-    with fluid.program_guard(main_program, startup_program):
-        with fluid.unique_name.guard():
-            data_loader, data, label = create_data_loader(image_shape)
+    with static.program_guard(main_program, startup_program):
+        with paddle.utils.unique_name.guard():
+            data_shape = [None] + image_shape
+            data = static.data(name='data', shape=data_shape, dtype='float32')
+            label = static.data(name='label', shape=[None, 1], dtype='int64')
+            if args.data == 'cifar10':
+                paddle.assign(paddle.reshape(label, [-1, 1]), label)
+            if is_test:
+                data_loader = paddle.io.DataLoader(
+                    dataset,
+                    places=places,
+                    feed_list=[data, label],
+                    drop_last=False,
+                    batch_size=args.batch_size,
+                    return_list=False,
+                    shuffle=False)
+            else:
+                data_loader = paddle.io.DataLoader(
+                    dataset,
+                    places=places,
+                    feed_list=[data, label],
+                    drop_last=True,
+                    batch_size=args.batch_size,
+                    return_list=False,
+                    shuffle=True,
+                    use_shared_memory=True,
+                    num_workers=4)
             output = archs(data)
-            output = fluid.layers.fc(input=output, size=args.class_dim)
+            output = static.nn.fc(output, size=args.class_dim)
 
-            softmax_out = fluid.layers.softmax(input=output, use_cudnn=False)
-            cost = fluid.layers.cross_entropy(input=softmax_out, label=label)
-            avg_cost = fluid.layers.mean(cost)
-            acc_top1 = fluid.layers.accuracy(
+            softmax_out = F.softmax(output)
+            cost = F.cross_entropy(softmax_out, label=label)
+            avg_cost = paddle.mean(cost)
+            acc_top1 = paddle.metric.accuracy(
                 input=softmax_out, label=label, k=1)
-            acc_top5 = fluid.layers.accuracy(
+            acc_top5 = paddle.metric.accuracy(
                 input=softmax_out, label=label, k=5)
 
             if is_test == False:
@@ -57,6 +72,8 @@ def build_program(main_program,
 
 
 def search_mobilenetv2(config, args, image_size, is_server=True):
+    places = static.cuda_places() if args.use_gpu else static.cpu_places()
+    place = places[0]
     if is_server:
         ### start a server and a client
         rl_nas = RLNAS(
@@ -86,56 +103,40 @@ def search_mobilenetv2(config, args, image_size, is_server=True):
             is_server=False)
 
     image_shape = [3, image_size, image_size]
+    if args.data == 'cifar10':
+        train_dataset = paddle.vision.datasets.Cifar10(mode='train')
+        val_dataset = paddle.vision.datasets.Cifar10(mode='test')
+
+    elif args.data == 'imagenet':
+        train_dataset = imagenet_reader.ImageNetDataset(mode='train')
+        val_dataset = imagenet_reader.ImageNetDataset(mode='val')
+
     for step in range(args.search_steps):
         archs = rl_nas.next_archs(1)[0][0]
 
-        train_program = fluid.Program()
-        test_program = fluid.Program()
-        startup_program = fluid.Program()
+        train_program = static.Program()
+        test_program = static.Program()
+        startup_program = static.Program()
         train_loader, avg_cost, acc_top1, acc_top5 = build_program(
-            train_program, startup_program, image_shape, archs, args)
+            train_program, startup_program, image_shape, train_dataset, archs,
+            args, places)
 
         test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program(
             test_program,
             startup_program,
             image_shape,
+            val_dataset,
             archs,
             args,
+            place,
             is_test=True)
         test_program = test_program.clone(for_test=True)
 
-        place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
-        exe = fluid.Executor(place)
+        exe = static.Executor(place)
         exe.run(startup_program)
 
-        if args.data == 'cifar10':
-            train_reader = paddle.fluid.io.batch(
-                paddle.reader.shuffle(
-                    paddle.dataset.cifar.train10(cycle=False), buf_size=1024),
-                batch_size=args.batch_size,
-                drop_last=True)
-
-            test_reader = paddle.fluid.io.batch(
-                paddle.dataset.cifar.test10(cycle=False),
-                batch_size=args.batch_size,
-                drop_last=False)
-        elif args.data == 'imagenet':
-            train_reader = paddle.fluid.io.batch(
-                imagenet_reader.train(),
-                batch_size=args.batch_size,
-                drop_last=True)
-            test_reader = paddle.fluid.io.batch(
-                imagenet_reader.val(),
-                batch_size=args.batch_size,
-                drop_last=False)
-
-        train_loader.set_sample_list_generator(
-            train_reader,
-            places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places())
-        test_loader.set_sample_list_generator(test_reader, places=place)
-
-        build_strategy = fluid.BuildStrategy()
-        train_compiled_program = fluid.CompiledProgram(
+        build_strategy = static.BuildStrategy()
+        train_compiled_program = static.CompiledProgram(
             train_program).with_data_parallel(
                 loss_name=avg_cost.name, build_strategy=build_strategy)
         for epoch_id in range(args.retain_epoch):
diff --git a/demo/nas/sa_nas_mobilenetv2.py b/demo/nas/sa_nas_mobilenetv2.py
index b12be1899929f0f9c9aa7aec9043d4269ec9408f..64e4748f484d8449da33ecff5b2417c0c077eba5 100644
--- a/demo/nas/sa_nas_mobilenetv2.py
+++ b/demo/nas/sa_nas_mobilenetv2.py
@@ -8,8 +8,10 @@ import argparse
 import ast
 import logging
 import paddle
-import paddle.fluid as fluid
-from paddle.fluid.param_attr import ParamAttr
+import paddle.nn as nn
+import paddle.static as static
+import paddle.nn.functional as F
+from paddle import ParamAttr
 from paddleslim.analysis import flops
 from paddleslim.nas import SANAS
 from paddleslim.common import get_logger
@@ -19,36 +21,50 @@ import imagenet_reader
 _logger = get_logger(__name__, level=logging.INFO)
 
 
-def create_data_loader(image_shape):
-    data_shape = [None] + image_shape
-    data = fluid.data(name='data', shape=data_shape, dtype='float32')
-    label = fluid.data(name='label', shape=[None, 1], dtype='int64')
-    data_loader = fluid.io.DataLoader.from_generator(
-        feed_list=[data, label],
-        capacity=1024,
-        use_double_buffer=True,
-        iterable=True)
-    return data_loader, data, label
-
-
 def build_program(main_program,
                   startup_program,
                   image_shape,
+                  dataset,
                   archs,
                   args,
+                  places,
                   is_test=False):
-    with fluid.program_guard(main_program, startup_program):
-        with fluid.unique_name.guard():
-            data_loader, data, label = create_data_loader(image_shape)
+    with static.program_guard(main_program, startup_program):
+        with paddle.utils.unique_name.guard():
+            data_shape = [None] + image_shape
+            data = static.data(name='data', shape=data_shape, dtype='float32')
+            label = static.data(name='label', shape=[None, 1], dtype='int64')
+            if args.data == 'cifar10':
+                paddle.assign(paddle.reshape(label, [-1, 1]), label)
+            if is_test:
+                data_loader = paddle.io.DataLoader(
+                    dataset,
+                    places=places,
+                    feed_list=[data, label],
+                    drop_last=False,
+                    batch_size=args.batch_size,
+                    return_list=False,
+                    shuffle=False)
+            else:
+                data_loader = paddle.io.DataLoader(
+                    dataset,
+                    places=places,
+                    feed_list=[data, label],
+                    drop_last=True,
+                    batch_size=args.batch_size,
+                    return_list=False,
+                    shuffle=True,
+                    use_shared_memory=True,
+                    num_workers=4)
             output = archs(data)
-            output = fluid.layers.fc(input=output, size=args.class_dim)
+            output = static.nn.fc(x=output, size=args.class_dim)
 
-            softmax_out = fluid.layers.softmax(input=output, use_cudnn=False)
-            cost = fluid.layers.cross_entropy(input=softmax_out, label=label)
-            avg_cost = fluid.layers.mean(cost)
-            acc_top1 = fluid.layers.accuracy(
+            softmax_out = F.softmax(output)
+            cost = F.cross_entropy(softmax_out, label=label)
+            avg_cost = paddle.mean(cost)
+            acc_top1 = paddle.metric.accuracy(
                 input=softmax_out, label=label, k=1)
-            acc_top5 = fluid.layers.accuracy(
+            acc_top5 = paddle.metric.accuracy(
                 input=softmax_out, label=label, k=5)
 
             if is_test == False:
@@ -58,6 +74,17 @@ def build_program(main_program,
 
 
 def search_mobilenetv2(config, args, image_size, is_server=True):
+    image_shape = [3, image_size, image_size]
+    if args.data == 'cifar10':
+        train_dataset = paddle.vision.datasets.Cifar10(mode='train')
+        val_dataset = paddle.vision.datasets.Cifar10(mode='test')
+
+    elif args.data == 'imagenet':
+        train_dataset = imagenet_reader.ImageNetDataset(mode='train')
+        val_dataset = imagenet_reader.ImageNetDataset(mode='val')
+
+    places = static.cuda_places() if args.use_gpu else static.cpu_places()
+    place = places[0]
     if is_server:
         ### start a server and a client
         sa_nas = SANAS(
@@ -73,15 +100,15 @@ def search_mobilenetv2(config, args, image_size, is_server=True):
             search_steps=args.search_steps,
             is_server=False)
 
-    image_shape = [3, image_size, image_size]
     for step in range(args.search_steps):
         archs = sa_nas.next_archs()[0]
 
-        train_program = fluid.Program()
-        test_program = fluid.Program()
-        startup_program = fluid.Program()
+        train_program = static.Program()
+        test_program = static.Program()
+        startup_program = static.Program()
         train_loader, avg_cost, acc_top1, acc_top5 = build_program(
-            train_program, startup_program, image_shape, archs, args)
+            train_program, startup_program, image_shape, train_dataset, archs,
+            args, places)
 
         current_flops = flops(train_program)
         print('step: {}, current_flops: {}'.format(step, current_flops))
@@ -92,43 +119,18 @@ def search_mobilenetv2(config, args, image_size, is_server=True):
             test_program,
             startup_program,
             image_shape,
+            val_dataset,
             archs,
             args,
+            place,
             is_test=True)
         test_program = test_program.clone(for_test=True)
 
-        place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
-        exe = fluid.Executor(place)
+        exe = static.Executor(place)
         exe.run(startup_program)
 
-        if args.data == 'cifar10':
-            train_reader = paddle.fluid.io.batch(
-                paddle.reader.shuffle(
-                    paddle.dataset.cifar.train10(cycle=False), buf_size=1024),
-                batch_size=args.batch_size,
-                drop_last=True)
-
-            test_reader = paddle.fluid.io.batch(
-                paddle.dataset.cifar.test10(cycle=False),
-                batch_size=args.batch_size,
-                drop_last=False)
-        elif args.data == 'imagenet':
-            train_reader = paddle.fluid.io.batch(
-                imagenet_reader.train(),
-                batch_size=args.batch_size,
-                drop_last=True)
-            test_reader = paddle.fluid.io.batch(
-                imagenet_reader.val(),
-                batch_size=args.batch_size,
-                drop_last=False)
-
-        train_loader.set_sample_list_generator(
-            train_reader,
-            places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places())
-        test_loader.set_sample_list_generator(test_reader, places=place)
-
-        build_strategy = fluid.BuildStrategy()
-        train_compiled_program = fluid.CompiledProgram(
+        build_strategy = static.BuildStrategy()
+        train_compiled_program = static.CompiledProgram(
             train_program).with_data_parallel(
                 loss_name=avg_cost.name, build_strategy=build_strategy)
         for epoch_id in range(args.retain_epoch):
@@ -169,6 +171,9 @@ def search_mobilenetv2(config, args, image_size, is_server=True):
 
 
 def test_search_result(tokens, image_size, args, config):
+    places = static.cuda_places() if args.use_gpu else static.cpu_places()
+    place = places[0]
+
     sa_nas = SANAS(
         config,
         server_addr=(args.server_address, args.port),
@@ -176,50 +181,42 @@ def test_search_result(tokens, image_size, args, config):
         is_server=True)
 
     image_shape = [3, image_size, image_size]
+    if args.data == 'cifar10':
+        train_dataset = paddle.vision.datasets.Cifar10(mode='train')
+        val_dataset = paddle.vision.datasets.Cifar10(mode='test')
+
+    elif args.data == 'imagenet':
+        train_dataset = imagenet_reader.ImageNetDataset(mode='train')
+        val_dataset = imagenet_reader.ImageNetDataset(mode='val')
 
     archs = sa_nas.tokens2arch(tokens)[0]
 
-    train_program = fluid.Program()
-    test_program = fluid.Program()
-    startup_program = fluid.Program()
+    train_program = static.Program()
+    test_program = static.Program()
+    startup_program = static.Program()
     train_loader, avg_cost, acc_top1, acc_top5 = build_program(
-        train_program, startup_program, image_shape, archs, args)
+        train_program, startup_program, image_shape, train_dataset, archs, args,
+        places)
 
     current_flops = flops(train_program)
     print('current_flops: {}'.format(current_flops))
     test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program(
-        test_program, startup_program, image_shape, archs, args, is_test=True)
+        test_program,
+        startup_program,
+        image_shape,
+        val_dataset,
+        archs,
+        args,
+        place,
+        is_test=True)
 
     test_program = test_program.clone(for_test=True)
 
-    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
-    exe = fluid.Executor(place)
+    exe = static.Executor(place)
     exe.run(startup_program)
 
-    if args.data == 'cifar10':
-        train_reader = paddle.fluid.io.batch(
-            paddle.reader.shuffle(
-                paddle.dataset.cifar.train10(cycle=False), buf_size=1024),
-            batch_size=args.batch_size,
-            drop_last=True)
-
-        test_reader = paddle.fluid.io.batch(
-            paddle.dataset.cifar.test10(cycle=False),
-            batch_size=args.batch_size,
-            drop_last=False)
-    elif args.data == 'imagenet':
-        train_reader = paddle.fluid.io.batch(
-            imagenet_reader.train(), batch_size=args.batch_size, drop_last=True)
-        test_reader = paddle.fluid.io.batch(
-            imagenet_reader.val(), batch_size=args.batch_size, drop_last=False)
-
-    train_loader.set_sample_list_generator(
-        train_reader,
-        places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places())
-    test_loader.set_sample_list_generator(test_reader, places=place)
-
-    build_strategy = fluid.BuildStrategy()
-    train_compiled_program = fluid.CompiledProgram(
+    build_strategy = static.BuildStrategy()
+    train_compiled_program = static.CompiledProgram(
         train_program).with_data_parallel(
             loss_name=avg_cost.name, build_strategy=build_strategy)
     for epoch_id in range(args.retain_epoch):
diff --git a/demo/nas/sanas_darts_space.py b/demo/nas/sanas_darts_space.py
index 43705e8781ab2875e55f7f0b3df12a6123a0f475..6410f3052861bbfeacbc1556e8f27c5cfade5f34 100644
--- a/demo/nas/sanas_darts_space.py
+++ b/demo/nas/sanas_darts_space.py
@@ -8,7 +8,10 @@ import time
 import argparse
 import ast
 import logging
-import paddle.fluid as fluid
+import paddle
+import paddle.nn.functional as F
+import paddle.nn as nn
+import paddle.static as static
 from paddleslim.nas import SANAS
 from paddleslim.common import get_logger
 import darts_cifar10_reader as reader
@@ -49,10 +52,10 @@ def count_parameters_in_MB(all_params, prefix='model'):
 
 
 def create_data_loader(image_shape, is_train, args):
-    image = fluid.data(
+    image = static.data(
         name="image", shape=[None] + image_shape, dtype="float32")
-    label = fluid.data(name="label", shape=[None, 1], dtype="int64")
-    data_loader = fluid.io.DataLoader.from_generator(
+    label = static.data(name="label", shape=[None, 1], dtype="int64")
+    data_loader = paddle.io.DataLoader.from_generator(
         feed_list=[image, label],
         capacity=64,
         use_double_buffer=True,
@@ -60,9 +63,9 @@ def create_data_loader(image_shape, is_train, args):
     drop_path_prob = ''
     drop_path_mask = ''
     if is_train:
-        drop_path_prob = fluid.data(
+        drop_path_prob = static.data(
             name="drop_path_prob", shape=[args.batch_size, 1], dtype="float32")
-        drop_path_mask = fluid.data(
+        drop_path_mask = static.data(
             name="drop_path_mask",
             shape=[args.batch_size, 20, 4, 2],
             dtype="float32")
@@ -72,36 +75,33 @@ def create_data_loader(image_shape, is_train, args):
 
 def build_program(main_program, startup_program, image_shape, archs, args,
                   is_train):
-    with fluid.program_guard(main_program, startup_program):
+    with static.program_guard(main_program, startup_program):
         data_loader, data, label, drop_path_prob, drop_path_mask = create_data_loader(
             image_shape, is_train, args)
         logits, logits_aux = archs(data, drop_path_prob, drop_path_mask,
                                    is_train, 10)
-        top1 = fluid.layers.accuracy(input=logits, label=label, k=1)
-        top5 = fluid.layers.accuracy(input=logits, label=label, k=5)
-        loss = fluid.layers.reduce_mean(
-            fluid.layers.softmax_with_cross_entropy(logits, label))
+        top1 = paddle.metric.accuracy(input=logits, label=label, k=1)
+        top5 = paddle.metric.accuracy(input=logits, label=label, k=5)
+        loss = paddle.mean(F.softmax_with_cross_entropy(logits, label))
 
         if is_train:
             if auxiliary:
-                loss_aux = fluid.layers.reduce_mean(
-                    fluid.layers.softmax_with_cross_entropy(logits_aux, label))
+                loss_aux = paddle.mean(
+                    F.softmax_with_cross_entropy(logits_aux, label))
                 loss = loss + auxiliary_weight * loss_aux
             step_per_epoch = int(trainset_num / args.batch_size)
-            learning_rate = fluid.layers.cosine_decay(lr, step_per_epoch,
-                                                      args.retain_epoch)
-            fluid.clip.set_gradient_clip(
-                clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=5.0))
-            optimizer = fluid.optimizer.MomentumOptimizer(
+            learning_rate = paddle.optimizer.lr.CosineAnnealingDecay(
+                lr, T_max=step_per_epoch * args.retain_epoch)
+            optimizer = paddle.optimizer.Momentum(
                 learning_rate,
                 momentum,
-                regularization=fluid.regularizer.L2DecayRegularizer(
-                    weight_decay))
+                weight_decay=paddle.regularizer.L2Decay(weight_decay),
+                grad_clip=nn.ClipGradByGlobalNorm(clip_norm=5.0))
             optimizer.minimize(loss)
-            outs = [loss, top1, top5, learning_rate]
+            outs = [loss, top1, top5]
         else:
             outs = [loss, top1, top5]
-    return outs, data_loader
+    return outs, (data, label), data_loader
 
 
 def train(main_prog, exe, epoch_id, train_loader, fetch_list, args):
@@ -129,16 +129,16 @@ def train(main_prog, exe, epoch_id, train_loader, fetch_list, args):
                 })
         else:
             feed = data
-        loss_v, top1_v, top5_v, lr = exe.run(
+        loss_v, top1_v, top5_v = exe.run(
             main_prog, feed=feed, fetch_list=[v.name for v in fetch_list])
         loss.update(loss_v, args.batch_size)
         top1.update(top1_v, args.batch_size)
         top5.update(top5_v, args.batch_size)
         if step_id % 10 == 0:
             _logger.info(
-                "Train Epoch {}, Step {}, Lr {:.8f}, loss {:.6f}, acc_1 {:.6f}, acc_5 {:.6f}".
-                format(epoch_id, step_id, lr[0], loss.avg[0], top1.avg[0],
-                       top5.avg[0]))
+                "Train Epoch {}, Step {}, loss {:.6f}, acc_1 {:.6f}, acc_5 {:.6f}".
+                format(epoch_id, step_id, loss.avg[0], top1.avg[0], top5.avg[
+                    0]))
     return top1.avg[0]
 
 
@@ -161,6 +161,8 @@ def valid(main_prog, exe, epoch_id, valid_loader, fetch_list, args):
 
 
 def search(config, args, image_size, is_server=True):
+    places = static.cuda_places() if args.use_gpu else static.cpu_places()
+    place = places[0]
     if is_server:
         ### start a server and a client
         sa_nas = SANAS(
@@ -180,10 +182,10 @@ def search(config, args, image_size, is_server=True):
     for step in range(args.search_steps):
         archs = sa_nas.next_archs()[0]
 
-        train_program = fluid.Program()
-        test_program = fluid.Program()
-        startup_program = fluid.Program()
-        train_fetch_list, train_loader = build_program(
+        train_program = static.Program()
+        test_program = static.Program()
+        startup_program = static.Program()
+        train_fetch_list, _, train_loader = build_program(
             train_program,
             startup_program,
             image_shape,
@@ -198,7 +200,7 @@ def search(config, args, image_size, is_server=True):
         if current_params > float(3.77):
             continue
 
-        test_fetch_list, test_loader = build_program(
+        test_fetch_list, _, test_loader = build_program(
             test_program,
             startup_program,
             image_shape,
@@ -207,8 +209,7 @@ def search(config, args, image_size, is_server=True):
             is_train=False)
         test_program = test_program.clone(for_test=True)
 
-        place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
-        exe = fluid.Executor(place)
+        exe = static.Executor(place)
         exe.run(startup_program)
 
         train_reader = reader.train_valid(
@@ -219,8 +220,8 @@ def search(config, args, image_size, is_server=True):
         train_loader.set_batch_generator(train_reader, places=place)
         test_loader.set_batch_generator(test_reader, places=place)
 
-        build_strategy = fluid.BuildStrategy()
-        train_compiled_program = fluid.CompiledProgram(
+        build_strategy = static.BuildStrategy()
+        train_compiled_program = static.CompiledProgram(
             train_program).with_data_parallel(
                 loss_name=train_fetch_list[0].name,
                 build_strategy=build_strategy)
@@ -241,52 +242,40 @@ def search(config, args, image_size, is_server=True):
 
 def final_test(config, args, image_size, token=None):
     assert token != None, "If you want to start a final experiment, you must input a token."
+    places = static.cuda_places() if args.use_gpu else static.cpu_places()
+    place = places[0]
     sa_nas = SANAS(
         config, server_addr=(args.server_address, args.port), is_server=True)
 
     image_shape = [3, image_size, image_size]
     archs = sa_nas.tokens2arch(token)[0]
 
-    train_program = fluid.Program()
-    test_program = fluid.Program()
-    startup_program = fluid.Program()
-    train_fetch_list, train_loader = build_program(
-        train_program,
-        startup_program,
-        image_shape,
-        archs,
-        args,
-        is_train=True)
+    train_program = static.Program()
+    test_program = static.Program()
+    startup_program = static.Program()
+    train_fetch_list, (data, label), train_loader = build_program(
+        train_program, startup_program, image_shape, archs, args, is_train=True)
 
     current_params = count_parameters_in_MB(
         train_program.global_block().all_parameters(), 'cifar10')
     _logger.info('current_params: {}M'.format(current_params))
-    test_fetch_list, test_loader = build_program(
-        test_program,
-        startup_program,
-        image_shape,
-        archs,
-        args,
-        is_train=False)
+    test_fetch_list, _, test_loader = build_program(
+        test_program, startup_program, image_shape, archs, args, is_train=False)
     test_program = test_program.clone(for_test=True)
 
-    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
-    exe = fluid.Executor(place)
+    exe = static.Executor(place)
     exe.run(startup_program)
 
     train_reader = reader.train_valid(
-        batch_size=args.batch_size, is_train=True, is_shuffle=True, args=args)
+        batch_size=args.batch_size, is_train=True, is_shuffle=True)
     test_reader = reader.train_valid(
-        batch_size=args.batch_size,
-        is_train=False,
-        is_shuffle=False,
-        args=args)
+        batch_size=args.batch_size, is_train=False, is_shuffle=False)
 
     train_loader.set_batch_generator(train_reader, places=place)
     test_loader.set_batch_generator(test_reader, places=place)
 
-    build_strategy = fluid.BuildStrategy()
-    train_compiled_program = fluid.CompiledProgram(
+    build_strategy = static.BuildStrategy()
+    train_compiled_program = static.CompiledProgram(
         train_program).with_data_parallel(
             loss_name=train_fetch_list[0].name, build_strategy=build_strategy)
 
@@ -305,11 +294,12 @@ def final_test(config, args, image_size, token=None):
         output_dir = os.path.join('darts_output', str(epoch_id))
         if not os.path.exists(output_dir):
             os.makedirs(output_dir)
-        fluid.io.save_persistables(exe, output_dir, main_program=train_program)
+        static.save_inference_model(output_dir, [data], test_fetch_list, exe)
 
 
 if __name__ == '__main__':
 
+    paddle.enable_static()
     parser = argparse.ArgumentParser(
         description='SA NAS MobileNetV2 cifar10 argparase')
     parser.add_argument(
diff --git a/demo/optimizer.py b/demo/optimizer.py
index 6b8962749b6f5000fadc67356dbb302b57d4c3e7..bd1d3bb40b98d030d54f9d387ad5dd2d90bf1c74 100644
--- a/demo/optimizer.py
+++ b/demo/optimizer.py
@@ -18,9 +18,7 @@ from __future__ import print_function
 
 import math
 
-import paddle.fluid as fluid
-import paddle.fluid.layers.ops as ops
-from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
+import paddle
 
 lr_strategy = 'cosine_decay'
 l2_decay = 1e-4
@@ -33,111 +31,6 @@ decay_rate = 0.97
 total_images = 1281167
 
 
-def cosine_decay(learning_rate, step_each_epoch, epochs=120):
-    """Applies cosine decay to the learning rate.
-    lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
-    """
-    global_step = _decay_step_counter()
-
-    epoch = ops.floor(global_step / step_each_epoch)
-    decayed_lr = learning_rate * \
-                 (ops.cos(epoch * (math.pi / epochs)) + 1)/2
-    return decayed_lr
-
-
-def cosine_decay_with_warmup(learning_rate, step_each_epoch, epochs=120):
-    """Applies cosine decay to the learning rate.
-    lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
-    decrease lr for every mini-batch and start with warmup.
-    """
-    global_step = _decay_step_counter()
-    lr = fluid.layers.tensor.create_global_var(
-        shape=[1],
-        value=0.0,
-        dtype='float32',
-        persistable=True,
-        name="learning_rate")
-
-    warmup_epoch = fluid.layers.fill_constant(
-        shape=[1], dtype='float32', value=float(5), force_cpu=True)
-
-    epoch = ops.floor(global_step / step_each_epoch)
-    with fluid.layers.control_flow.Switch() as switch:
-        with switch.case(epoch < warmup_epoch):
-            decayed_lr = learning_rate * (global_step /
-                                          (step_each_epoch * warmup_epoch))
-            fluid.layers.tensor.assign(input=decayed_lr, output=lr)
-        with switch.default():
-            decayed_lr = learning_rate * \
-                (ops.cos((global_step - warmup_epoch * step_each_epoch) * (math.pi / (epochs * step_each_epoch))) + 1)/2
-            fluid.layers.tensor.assign(input=decayed_lr, output=lr)
-    return lr
-
-
-def exponential_decay_with_warmup(learning_rate,
-                                  step_each_epoch,
-                                  decay_epochs,
-                                  decay_rate=0.97,
-                                  warm_up_epoch=5.0):
-    """Applies exponential decay to the learning rate.
-    """
-    global_step = _decay_step_counter()
-    lr = fluid.layers.tensor.create_global_var(
-        shape=[1],
-        value=0.0,
-        dtype='float32',
-        persistable=True,
-        name="learning_rate")
-
-    warmup_epoch = fluid.layers.fill_constant(
-        shape=[1], dtype='float32', value=float(warm_up_epoch), force_cpu=True)
-
-    epoch = ops.floor(global_step / step_each_epoch)
-    with fluid.layers.control_flow.Switch() as switch:
-        with switch.case(epoch < warmup_epoch):
-            decayed_lr = learning_rate * (global_step /
-                                          (step_each_epoch * warmup_epoch))
-            fluid.layers.assign(input=decayed_lr, output=lr)
-        with switch.default():
-            div_res = (global_step - warmup_epoch * step_each_epoch
-                       ) / decay_epochs
-            div_res = ops.floor(div_res)
-            decayed_lr = learning_rate * (decay_rate**div_res)
-            fluid.layers.assign(input=decayed_lr, output=lr)
-
-    return lr
-
-
-def lr_warmup(learning_rate, warmup_steps, start_lr, end_lr):
-    """ Applies linear learning rate warmup for distributed training
-        Argument learning_rate can be float or a Variable
-        lr = lr + (warmup_rate * step / warmup_steps)
-    """
-    assert (isinstance(end_lr, float))
-    assert (isinstance(start_lr, float))
-    linear_step = end_lr - start_lr
-    with fluid.default_main_program()._lr_schedule_guard():
-        lr = fluid.layers.tensor.create_global_var(
-            shape=[1],
-            value=0.0,
-            dtype='float32',
-            persistable=True,
-            name="learning_rate_warmup")
-
-        global_step = fluid.layers.learning_rate_scheduler._decay_step_counter(
-        )
-
-        with fluid.layers.control_flow.Switch() as switch:
-            with switch.case(global_step < warmup_steps):
-                decayed_lr = start_lr + linear_step * (global_step /
-                                                       warmup_steps)
-                fluid.layers.tensor.assign(decayed_lr, lr)
-            with switch.default():
-                fluid.layers.tensor.assign(learning_rate, lr)
-
-        return lr
-
-
 class Optimizer(object):
     """A class used to represent several optimizer methods
 
@@ -167,23 +60,13 @@ class Optimizer(object):
         self.decay_epochs = decay_epochs
         self.decay_rate = decay_rate
         self.total_images = total_images
+        if args.use_gpu:
+            devices_num = paddle.fluid.core.get_cuda_device_count()
+        else:
+            devices_num = int(os.environ.get('CPU_NUM', 1))
 
-        self.step = int(math.ceil(float(self.total_images) / self.batch_size))
-
-    def piecewise_decay(self):
-        """piecewise decay with Momentum optimizer
-
-            Returns:
-            a piecewise_decay optimizer
-        """
-        bd = [self.step * e for e in self.step_epochs]
-        lr = [self.lr * (0.1**i) for i in range(len(bd) + 1)]
-        learning_rate = fluid.layers.piecewise_decay(boundaries=bd, values=lr)
-        optimizer = fluid.optimizer.Momentum(
-            learning_rate=learning_rate,
-            momentum=self.momentum_rate,
-            regularization=fluid.regularizer.L2Decay(self.l2_decay))
-        return optimizer
+        self.step = int(
+            math.ceil(float(self.total_images) / self.batch_size) / devices_num)
 
     def cosine_decay(self):
         """cosine decay with Momentum optimizer
@@ -191,111 +74,25 @@ class Optimizer(object):
         Returns:
             a cosine_decay optimizer
         """
-
-        learning_rate = fluid.layers.cosine_decay(
-            learning_rate=self.lr,
-            step_each_epoch=self.step,
-            epochs=self.num_epochs)
-        optimizer = fluid.optimizer.Momentum(
-            learning_rate=learning_rate,
-            momentum=self.momentum_rate,
-            regularization=fluid.regularizer.L2Decay(self.l2_decay))
-        return optimizer
-
-    def cosine_decay_warmup(self):
-        """cosine decay with warmup
-
-        Returns:
-            a cosine_decay_with_warmup optimizer
-        """
-
-        learning_rate = cosine_decay_with_warmup(
-            learning_rate=self.lr,
-            step_each_epoch=self.step,
-            epochs=self.num_epochs)
-        optimizer = fluid.optimizer.Momentum(
-            learning_rate=learning_rate,
-            momentum=self.momentum_rate,
-            regularization=fluid.regularizer.L2Decay(self.l2_decay))
-        return optimizer
-
-    def exponential_decay_warmup(self):
-        """exponential decay with warmup
-
-        Returns:
-            a exponential_decay_with_warmup optimizer
-        """
-
-        learning_rate = exponential_decay_with_warmup(
+        learning_rate = paddle.optimizer.lr.CosineAnnealingDecay(
             learning_rate=self.lr,
-            step_each_epoch=self.step,
-            decay_epochs=self.step * self.decay_epochs,
-            decay_rate=self.decay_rate,
-            warm_up_epoch=self.warm_up_epochs)
-        optimizer = fluid.optimizer.RMSProp(
-            learning_rate=learning_rate,
-            regularization=fluid.regularizer.L2Decay(self.l2_decay),
-            momentum=self.momentum_rate,
-            rho=0.9,
-            epsilon=0.001)
-        return optimizer
-
-    def linear_decay(self):
-        """linear decay with Momentum optimizer
-
-        Returns:
-            a linear_decay optimizer
-        """
-
-        end_lr = 0
-        learning_rate = fluid.layers.polynomial_decay(
-            self.lr, self.step, end_lr, power=1)
-        optimizer = fluid.optimizer.Momentum(
+            T_max=self.step * self.num_epochs,
+            verbose=False)
+        optimizer = paddle.optimizer.Momentum(
             learning_rate=learning_rate,
             momentum=self.momentum_rate,
-            regularization=fluid.regularizer.L2Decay(self.l2_decay))
-
+            weight_decay=paddle.regularizer.L2Decay(self.l2_decay))
         return optimizer
 
-    def adam_decay(self):
-        """Adam optimizer
-
-        Returns: 
-            an adam_decay optimizer
-        """
-
-        return fluid.optimizer.Adam(learning_rate=self.lr)
-
-    def cosine_decay_RMSProp(self):
-        """cosine decay with RMSProp optimizer
-
-        Returns: 
-            an cosine_decay_RMSProp optimizer
-        """
-
-        learning_rate = fluid.layers.cosine_decay(
-            learning_rate=self.lr,
-            step_each_epoch=self.step,
-            epochs=self.num_epochs)
-        optimizer = fluid.optimizer.RMSProp(
+    def piecewise_decay(args):
+        bd = [step * e for e in args.step_epochs]
+        lr = [args.lr * (0.1**i) for i in range(len(bd) + 1)]
+        learning_rate = paddle.optimizer.lr.PiecewiseDecay(
+            boundaries=bd, values=lr, verbose=False)
+        optimizer = paddle.optimizer.Momentum(
             learning_rate=learning_rate,
-            momentum=self.momentum_rate,
-            regularization=fluid.regularizer.L2Decay(self.l2_decay),
-            # Apply epsilon=1 on ImageNet dataset.
-            epsilon=1)
-        return optimizer
-
-    def default_decay(self):
-        """default decay
-
-        Returns:
-            default decay optimizer
-        """
-
-        optimizer = fluid.optimizer.Momentum(
-            learning_rate=self.lr,
-            momentum=self.momentum_rate,
-            regularization=fluid.regularizer.L2Decay(self.l2_decay))
+            momentum=args.momentum_rate,
+            weight_decay=paddle.regularizer.L2Decay(args.l2_decay))
         return optimizer