diff --git a/demo/auto_prune/README.md b/demo/auto_prune/README.md
index 9ada4cf4c848ef24e296aa3ebf4724c707354199..0f6b5bb80ce8182e3d10aad29e5c0ae12659c5de 100644
--- a/demo/auto_prune/README.md
+++ b/demo/auto_prune/README.md
@@ -1,5 +1,5 @@
 该示例介绍如何使用自动裁剪。
-该示例使用默认会自动下载并使用MNIST数据。支持以下模型：
+该示例需要使用IMAGENET数据，以及预训练模型。支持以下模型：
 
 - MobileNetV1
 - MobileNetV2
@@ -15,7 +15,7 @@
 ## 2. 运行示例
 
 
-提供两种自动裁剪模式，直接以裁剪目标进行一次自动裁剪，和多次迭代的方式进行裁剪。 
+提供两种自动裁剪模式，直接以裁剪目标进行一次自动裁剪，和多次迭代的方式进行裁剪。
 
 ###2.1一次裁剪
 
diff --git a/demo/auto_prune/train.py b/demo/auto_prune/train.py
index ff55a54597ce7e923764f9e6f8dff232d2d28658..f0b2a2b0b4cd7cd235af55268d1f2c44b11a8339 100644
--- a/demo/auto_prune/train.py
+++ b/demo/auto_prune/train.py
@@ -8,10 +8,10 @@ import math
 import time
 import numpy as np
 import paddle.fluid as fluid
+sys.path[0] = os.path.join(os.path.dirname("__file__"), os.path.pardir)
 from paddleslim.prune import AutoPruner
 from paddleslim.common import get_logger
 from paddleslim.analysis import flops
-sys.path.append(sys.path[0] + "/../")
 import models
 from utility import add_arguments, print_arguments
 
@@ -23,7 +23,7 @@ add_arg = functools.partial(add_arguments, argparser=parser)
 add_arg('batch_size',       int,  64 * 4,                 "Minibatch size.")
 add_arg('use_gpu',          bool, True,                "Whether to use GPU or not.")
 add_arg('model',            str,  "MobileNet",                "The target model.")
-add_arg('pretrained_model', str,  "../pretrained_model/MobileNetV1_pretained",                "Whether to use pretrained model.")
+add_arg('pretrained_model', str,  "../pretrained_model/MobileNetV1_pretrained",                "Whether to use pretrained model.")
 add_arg('lr',               float,  0.1,               "The learning rate used to fine-tune pruned model.")
 add_arg('lr_strategy',      str,  "piecewise_decay",   "The learning rate decay strategy.")
 add_arg('l2_decay',         float,  3e-5,               "The l2_decay parameter.")
@@ -32,7 +32,7 @@ add_arg('num_epochs',       int,  120,               "The number of total epochs
 add_arg('total_images',     int,  1281167,               "The number of total training images.")
 parser.add_argument('--step_epochs', nargs='+', type=int, default=[30, 60, 90], help="piecewise decay step")
 add_arg('config_file',      str, None,                 "The config file for compression with yaml format.")
-add_arg('data',             str, "mnist",                 "Which data to use. 'mnist' or 'imagenet'")
+add_arg('data',             str, "imagenet",                 "Which data to use. 'mnist' or 'imagenet'")
 add_arg('log_period',       int, 10,                 "Log period in batches.")
 add_arg('test_period',      int, 10,                 "Test period in epoches.")
 # yapf: enable
@@ -90,8 +90,8 @@ def compress(args):
         raise ValueError("{} is not supported.".format(args.data))
 
     image_shape = [int(m) for m in image_shape.split(",")]
-    assert args.model in model_list, "{} is not in lists: {}".format(
-        args.model, model_list)
+    assert args.model in model_list, "{} is not in lists: {}".format(args.model,
+                                                                     model_list)
     image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
     label = fluid.layers.data(name='label', shape=[1], dtype='int64')
     # model definition
@@ -111,8 +111,7 @@ def compress(args):
     if args.pretrained_model:
 
         def if_exist(var):
-            return os.path.exists(
-                os.path.join(args.pretrained_model, var.name))
+            return os.path.exists(os.path.join(args.pretrained_model, var.name))
 
         fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist)
 
@@ -145,10 +144,9 @@ def compress(args):
             acc_top5_ns.append(np.mean(acc_top5_n))
             batch_id += 1
 
-        _logger.info("Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".
-                     format(epoch,
-                            np.mean(np.array(acc_top1_ns)),
-                            np.mean(np.array(acc_top5_ns))))
+        _logger.info("Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format(
+            epoch,
+            np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns))))
         return np.mean(np.array(acc_top1_ns))
 
     def train(epoch, program):
diff --git a/demo/auto_prune/train_finetune.py b/demo/auto_prune/train_finetune.py
index 0dcb56bfe7988475cc7c63ebde257897456bfb22..bc56ffa0568520634aab5c294a03c05813f13a54 100644
--- a/demo/auto_prune/train_finetune.py
+++ b/demo/auto_prune/train_finetune.py
@@ -21,7 +21,7 @@ add_arg('batch_size',       int,  64 * 4,                 "Minibatch size.")
 add_arg('use_gpu',          bool, True,                "Whether to use GPU or not.")
 add_arg('model',            str,  "MobileNet",                "The target model.")
 add_arg('model_save_dir',            str,  "./",                "checkpoint  model.")
-add_arg('pretrained_model', str,  "../pretrained_model/MobileNetV1_pretained",                "Whether to use pretrained model.")
+add_arg('pretrained_model', str,  "../pretrained_model/MobileNetV1_pretrained",                "Whether to use pretrained model.")
 add_arg('lr',               float,  0.01,               "The learning rate used to fine-tune pruned model.")
 add_arg('lr_strategy',      str,  "piecewise_decay",   "The learning rate decay strategy.")
 add_arg('l2_decay',         float,  3e-5,               "The l2_decay parameter.")
@@ -81,8 +81,8 @@ def compress(args):
     class_dim = 1000
     image_shape = "3,224,224"
     image_shape = [int(m) for m in image_shape.split(",")]
-    assert args.model in model_list, "{} is not in lists: {}".format(
-        args.model, model_list)
+    assert args.model in model_list, "{} is not in lists: {}".format(args.model,
+                                                                     model_list)
     image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
     label = fluid.layers.data(name='label', shape=[1], dtype='int64')
     # model definition
@@ -158,8 +158,7 @@ def compress(args):
                 train_program,
                 feed=train_feeder.feed(data),
                 fetch_list=[
-                    avg_cost.name, acc_top1.name, acc_top5.name,
-                    "learning_rate"
+                    avg_cost.name, acc_top1.name, acc_top5.name, "learning_rate"
                 ])
             end_time = time.time()
             loss_n = np.mean(loss_n)
@@ -177,8 +176,7 @@ def compress(args):
         #if "_weights" in  param.name and "conv1_weights" not in param.name:
         if "_sep_weights" in param.name:
             params.append(param.name)
-    print("fops before pruning: {}".format(
-        flops(fluid.default_main_program())))
+    print("fops before pruning: {}".format(flops(fluid.default_main_program())))
     pruned_program_iter = fluid.default_main_program()
     pruned_val_program_iter = val_program
     for ratios in ratiolist:
diff --git a/demo/auto_prune/train_iterator.py b/demo/auto_prune/train_iterator.py
index 8b7ca07f768e661476a5943dbc623291a37f939a..be80d892e0911e1ac592a7bfb8ab1f995b4f87f2 100644
--- a/demo/auto_prune/train_iterator.py
+++ b/demo/auto_prune/train_iterator.py
@@ -24,7 +24,7 @@ add_arg = functools.partial(add_arguments, argparser=parser)
 add_arg('batch_size',       int,  64 * 4,                 "Minibatch size.")
 add_arg('use_gpu',          bool, True,                "Whether to use GPU or not.")
 add_arg('model',            str,  "MobileNet",                "The target model.")
-add_arg('pretrained_model', str,  "../pretrained_model/MobileNetV1_pretained",                "Whether to use pretrained model.")
+add_arg('pretrained_model', str,  "../pretrained_model/MobileNetV1_pretrained",                "Whether to use pretrained model.")
 add_arg('model_save_dir',   str,  "./",                "checkpoint  model.")
 add_arg('lr',               float,  0.1,               "The learning rate used to fine-tune pruned model.")
 add_arg('lr_strategy',      str,  "piecewise_decay",   "The learning rate decay strategy.")
@@ -34,7 +34,7 @@ add_arg('num_epochs',       int,  120,               "The number of total epochs
 add_arg('total_images',     int,  1281167,               "The number of total training images.")
 parser.add_argument('--step_epochs', nargs='+', type=int, default=[30, 60, 90], help="piecewise decay step")
 add_arg('config_file',      str, None,                 "The config file for compression with yaml format.")
-add_arg('data',             str, "mnist",                 "Which data to use. 'mnist' or 'imagenet'")
+add_arg('data',             str, "imagenet",                 "Which data to use. 'mnist' or 'imagenet'")
 add_arg('log_period',       int, 10,                 "Log period in batches.")
 add_arg('test_period',      int, 10,                 "Test period in epoches.")
 # yapf: enable
@@ -96,8 +96,8 @@ def compress(args):
         raise ValueError("{} is not supported.".format(args.data))
 
     image_shape = [int(m) for m in image_shape.split(",")]
-    assert args.model in model_list, "{} is not in lists: {}".format(
-        args.model, model_list)
+    assert args.model in model_list, "{} is not in lists: {}".format(args.model,
+                                                                     model_list)
     image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
     label = fluid.layers.data(name='label', shape=[1], dtype='int64')
     # model definition
@@ -117,8 +117,7 @@ def compress(args):
     if args.pretrained_model:
 
         def if_exist(var):
-            return os.path.exists(
-                os.path.join(args.pretrained_model, var.name))
+            return os.path.exists(os.path.join(args.pretrained_model, var.name))
 
 #        fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist)
 
@@ -151,10 +150,9 @@ def compress(args):
             acc_top5_ns.append(np.mean(acc_top5_n))
             batch_id += 1
 
-        _logger.info("Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".
-                     format(epoch,
-                            np.mean(np.array(acc_top1_ns)),
-                            np.mean(np.array(acc_top5_ns))))
+        _logger.info("Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format(
+            epoch,
+            np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns))))
         return np.mean(np.array(acc_top1_ns))
 
     def train(epoch, program):
diff --git a/paddleslim/prune/auto_pruner.py b/paddleslim/prune/auto_pruner.py
index 54ae8f3aab6c6047677661a66e0ddd7fd0d3d3e9..6ace8546567fe7cce7ac8ec971d6a81d59546109 100644
--- a/paddleslim/prune/auto_pruner.py
+++ b/paddleslim/prune/auto_pruner.py
@@ -41,12 +41,12 @@ class AutoPruner(object):
         params(list<str>): The names of parameters to be pruned.
         init_ratios(list<float>|float): Init ratios used to pruned parameters in `params`.
             List means ratios used for pruning each parameter in `params`.
-            The length of `init_ratios` should be equal to length of params when `init_ratios` is a list. 
+            The length of `init_ratios` should be equal to length of params when `init_ratios` is a list.
             If it is a scalar, all the parameters in `params` will be pruned by uniform ratio.
             None means get a group of init ratios by `pruned_flops` of `pruned_latency`. Default: None.
         pruned_flops(float): The percent of FLOPS to be pruned. Default: None.
         pruned_latency(float): The percent of latency to be pruned. Default: None.
-        server_addr(tuple): A tuple of server ip and server port for controller server. 
+        server_addr(tuple): A tuple of server ip and server port for controller server.
         init_temperature(float): The init temperature used in simulated annealing search strategy.
         reduce_rate(float): The decay rate used in simulated annealing search strategy.
         max_try_times(int): The max number of trying to generate legal tokens.
@@ -191,14 +191,14 @@ class AutoPruner(object):
             paddle.fluid.Program: The pruned program.
         """
         self._current_ratios = self._next_ratios()
-        pruned_program, _, _ = self._pruner.prune(
+        pruned_program, self._param_backup, _ = self._pruner.prune(
             program,
             self._scope,
             self._params,
             self._current_ratios,
             place=self._place,
             only_graph=False,
-            param_backup=self._param_backup)
+            param_backup=True)
         pruned_val_program = None
         if eval_program is not None:
             pruned_val_program, _, _ = self._pruner.prune(