Remove slim from paddle framework (#25666)

* Remove slim from paddle framework test=develop Co-authored-by: N wanghaoshuang <wanghaoshuang@baidu.com>

Remove slim from paddle framework (#25666)
* Remove slim from paddle framework test=develop Co-authored-by: N wanghaoshuang <wanghaoshuang@baidu.com>
2131559d · Bai Yifan · GitHub · bca30316 · 2131559d · 2131559d
56 changed file
--- a/python/paddle/fluid/contrib/__init__.py
+++ b/python/paddle/fluid/contrib/__init__.py
@@ -25,7 +25,6 @@ from .quantize import *
 from . import reader
 from .reader import *
 from . import slim
-from .slim import *
 from . import utils
 from .utils import *
 from . import extend_optimizer
@@ -43,7 +42,6 @@ __all__ += memory_usage_calc.__all__
 __all__ += op_frequence.__all__
 __all__ += quantize.__all__
 __all__ += reader.__all__
-__all__ += slim.__all__
 __all__ += utils.__all__
 __all__ += extend_optimizer.__all__
 __all__ += ['mixed_precision']

--- a/python/paddle/fluid/contrib/slim/__init__.py
+++ b/python/paddle/fluid/contrib/slim/__init__.py
@@ -11,6 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from .core import *
-__all__ = ['Compressor', ]
--- a/python/paddle/fluid/contrib/slim/core/__init__.py
+++ b/python/paddle/fluid/contrib/slim/core/__init__.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from . import config
-from .config import *
-from . import compressor
-from .compressor import *
-from . import strategy
-from .strategy import *
-
-__all__ = config.__all__ + compressor.__all__ + strategy.__all__
--- a/python/paddle/fluid/contrib/slim/core/compressor.py
+++ b/python/paddle/fluid/contrib/slim/core/compressor.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from ....core import CPUPlace, EOFException
-from .... import compiler
-from ....framework import Variable
-from .... import io
-from .... import profiler
-from .... import scope_guard
-from ....data_feeder import DataFeeder
-from ....log_helper import get_logger
-from ....reader import DataLoaderBase
-from ..graph import *
-from .config import ConfigFactory
-import numpy as np
-from collections import Iterable
-import time
-import os
-import logging
-import sys
-import pickle
-import functools
-import traceback
-
-__all__ = ['Context', 'Compressor']
-
-_logger = get_logger(
-    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
-
-
-def cached_reader(reader, sampled_rate, cache_path, cached_id):
-    """
-    Sample partial data from reader and cache them into local file system.
-    Args:
-        reader: Iterative data source.
-        sampled_rate(float): The sampled rate used to sample partial data for evaluation. None means using all data in eval_reader. default: None.
-        cache_path(str): The path to cache the sampled data.
-        cached_id(int): The id of dataset sampled. Evaluations with same cached_id use the same sampled dataset. default: 0.
-    """
-    np.random.seed(cached_id)
-    cache_path = os.path.join(cache_path, str(cached_id))
-    _logger.debug('read data from: {}'.format(cache_path))
-
-    def s_reader():
-        if os.path.isdir(cache_path):
-            for file_name in open(os.path.join(cache_path, "list")):
-                yield np.load(
-                    os.path.join(cache_path, file_name.strip()),
-                    allow_pickle=True)
-        else:
-            os.makedirs(cache_path)
-            list_file = open(os.path.join(cache_path, "list"), 'w')
-            batch = 0
-            dtype = None
-            for data in reader():
-                if batch == 0 or (np.random.uniform() < sampled_rate):
-                    np.save(
-                        os.path.join(cache_path, 'batch' + str(batch)), data)
-                    list_file.write('batch' + str(batch) + '.npy\n')
-                    batch += 1
-                    yield data
-
-    return s_reader
-
-
-class Context(object):
-    """
-    The context in the process of compression.
-    """
-
-    def __init__(self,
-                 place,
-                 scope,
-                 train_graph=None,
-                 train_reader=None,
-                 eval_graph=None,
-                 eval_reader=None,
-                 teacher_graphs=None,
-                 train_optimizer=None,
-                 distiller_optimizer=None,
-                 search_space=None):
-        """
-        Args:
-            place: The device place where the compression job running.
-            scope: The scope used in compression job.
-            train_graph: The graph with loss as output node.
-            eval_graph: The graph used for evaluation.
-            eval_reader: The data reader used for evaluation.
-            teacher_graphs: The teacher graphs used in distillation strategies.
-            train_optimizer: The optimizer used to append backward ops and
-                             optimization ops into train_graph.
-            distiller_optimizer: The optimizer used by distillation strategies.
-        """
-        # The total number of epoches to be trained.
-        self.epoch = 0
-        # Current epoch
-        self.epoch_id = 0
-        # Current batch
-        self.batch_id = 0
-
-        self.k_v = {}
-
-        self.place = place
-        self.scope = scope
-        self.train_graph = train_graph
-        self.train_reader = train_reader
-        self.eval_graph = eval_graph
-        self.eval_reader = eval_reader
-        self.executor = None
-        self.teacher_graphs = teacher_graphs
-        self.train_optimizer = train_optimizer
-        self.distiller_optimizer = distiller_optimizer
-        self.optimize_graph = None
-        self.cache_path = './eval_cache'
-        self.eval_results = {}
-
-        self.skip_training = False
-        self.search_space = search_space
-
-    def to_file(self, file_name):
-        """
-        Save the context into file.
-        """
-        data = {}
-        data['epoch_id'] = self.epoch_id
-        data['eval_results'] = self.eval_results
-        with open(file_name, 'wb') as context_file:
-            pickle.dump(data, context_file)
-
-    def from_file(self, file_name):
-        """
-        Load the context from file.
-        """
-        with open(file_name, 'rb') as context_file:
-            if sys.version_info < (3, 0):
-                data = pickle.load(context_file)
-            else:
-                data = pickle.load(context_file, encoding='bytes')
-            self.epoch_id = data['epoch_id']
-            self.eval_results = data['eval_results']
-
-    def eval_converged(self, metric_name, delta=0.001):
-        """
-        Check whether the training has been converged.
-        Args:
-            metric_name(str): The metric used to check convergence.
-            delta(float): '(metric[k] - metric[k-1] / metric[k-1]) < delta'
-                          means that the training has been converged.
-        Returns:
-            bool: True means the training has been converged.
-        """
-        # TODO(wanghaoshuang@baidu.com): enhence this method.
-        if (metric_name not in self.eval_results
-            ) or len(self.eval_results[metric_name]) < 2:
-            return False
-        results = self.eval_results[metric_name][-2:]
-        _logger.info('Latest evaluations: {}'.format(results))
-        return abs(results[1] - results[0]) / results[0] < delta
-
-    def run_eval_graph(self, sampled_rate=None, cached_id=0):
-        """
-        Evaluate the current mode in context.
-        Args:
-            sampled_rate(float): The sampled rate used to sample partial data
-            for evaluation. None means using all data in eval_reader. default: None.
-            cached_id(int): The id of dataset sampled. Evaluations with same
-                            cached_id use the same sampled dataset. default: 0.
-        """
-        _logger.info('Running evaluation')
-        assert self.eval_graph is not None
-        assert self.eval_reader is not None
-        eval_graph = self.eval_graph.clone(for_test=True)
-
-        executor = SlimGraphExecutor(self.place)
-        results = []
-        batch_id = 0
-        s_time = time.time()
-        reader = self.eval_reader
-        if sampled_rate:
-            assert (not isinstance(reader, Variable))
-            assert (sampled_rate > 0)
-            assert (self.cache_path is not None)
-            _logger.info('sampled_rate: {}; cached_id: {}'.format(sampled_rate,
-                                                                  cached_id))
-            reader = cached_reader(reader, sampled_rate, self.cache_path,
-                                   cached_id)
-
-        if isinstance(reader, Variable) or (
-                isinstance(reader, DataLoaderBase) and (not reader.iterable)):
-            reader.start()
-            try:
-                while True:
-                    result = executor.run(eval_graph, self.scope)
-                    result = [np.mean(r) for r in result]
-                    results.append(result)
-                    if batch_id % 20 == 0:
-                        _logger.info("batch-{}; {}={}".format(
-                            batch_id, eval_graph.out_nodes.keys(), result))
-                    batch_id += 1
-            except EOFException:
-                reader.reset()
-        else:
-            for data in reader():
-                result = executor.run(eval_graph, self.scope, data=data)
-                result = [np.mean(r) for r in result]
-                results.append(result)
-                if batch_id % 20 == 0:
-                    _logger.info("batch-{}; {}={}".format(
-                        batch_id, eval_graph.out_nodes.keys(), result))
-                batch_id += 1
-
-        result = list(np.mean(np.array(results), axis=0))
-        _logger.info("Final eval result: {}={}".format(
-            eval_graph.out_nodes.keys(), result))
-        if not isinstance(result, Iterable):
-            result = [result]
-        _logger.info('Finish evaluation')
-        return result, eval_graph.out_nodes.keys()
-
-    def put(self, key, value):
-        self.k_v[key] = value
-
-    def get(self, key):
-        return self.k_v.get(key)
-
-
-class Compressor(object):
-    """
-    The pass used to compress model.
-    """
-
-    def __init__(self,
-                 place,
-                 scope,
-                 train_program,
-                 train_reader=None,
-                 train_feed_list=None,
-                 train_fetch_list=None,
-                 eval_program=None,
-                 eval_reader=None,
-                 eval_feed_list=None,
-                 eval_fetch_list=None,
-                 eval_func=None,
-                 save_eval_model=True,
-                 prune_infer_model=None,
-                 teacher_programs=[],
-                 checkpoint_path=None,
-                 train_optimizer=None,
-                 distiller_optimizer=None,
-                 search_space=None,
-                 log_period=20):
-        """
-        Args:
-            place(fluid.Place): The device place where the compression job running.
-            scope(fluid.core.Scope): The scope used to run graph.
-            train_program(Program): The main program to be compressed. It must have loss op.
-            train_reader: The data reader used for training.
-            train_feed_list(dict): A dict to indicate the input variable of the training program.
-                                   The key is user-defined and human-readable name.
-                                   The value is the name of Variable.
-            train_fetch_list(dict): A dict to indicate the output variable of the training program.
-                                   The key is user-defined and human-readable name.
-                                   The value is the name of Variable.
-            eval_program(Program): The program used for evaluation.
-            eval_reader: The data reader used for evaluation. It can be None if eval_func is not None.
-            eval_feed_list(dict): A dict to indicate the input variable of the evaluation program.
-                                   The key is user-defined and human-readable name.
-                                   The value is the name of Variable.
-                                   It can be None if eval_func is not None.
-            eval_fetch_list(dict): A dict to indicate the output variable of the evaluation program.
-                                   The key is user-defined and human-readable name.
-                                   The value is the name of Variable.
-            eval_func(dict|function): Callback functions used to evaluate the compressed model.
-                                   The eval_func is a dict, the key is user-defined name and the value is 
-                                   a callback function. And the score returned from callback functions 
-                                   can be referenced in config file by the key of eval_func.
-                                   The args of callback function are compressed eval_program and scope which
-                                   store the compressed parameters.
-                                   Default: None.
-            save_eval_model(bool): Whether to save eval model when saving checkpoints. Default: True.
-            prune_infer_model(tuple|list): If prune_infer_model is not None, compressor will prune
-                                   eval program into inference program according to inputs and outputs
-                                   defined in prune_infer_model. prune_infer_model[0] is a list of input
-                                   variables' names and prune_infer_model[1] is a list of output variables'
-                                   names. If prune_infer_model is None, it will not save inference model.
-                                   Default: None.
-            teacher_programs: The teacher graphs used in distillation strategies.
-            train_optimizer: The optimizer used to append backward ops and
-                             optimization ops into train_graph.
-            distiller_optimizer: The optimizer used by distillation strategies. In distillation strategy,
-                                 this optimizer is used to minimize the combined loss of student-net and
-                                 teacher-net while train_optimizer is used to minimize loss of
-                                 student-net in fine-tune stage. 
-            search_space(slim.nas.SearchSpace): The instance that define the searching space. It must inherit
-                              slim.nas.SearchSpace class and overwrite the abstract methods.
-            log_period(int): The period of print log of training.
-
-        """
-        assert train_feed_list is None or isinstance(
-            train_feed_list, list
-        ), "train_feed_list should be a list of tuple, such as [('image', image.name), ('label', gt.name)]"
-        assert eval_feed_list is None or isinstance(
-            eval_feed_list, list
-        ), "eval_feed_list should be a list of tuple, such as [('image', image.name), ('label', gt.name)]"
-        self.strategies = []
-        self.epoch = 0
-        self.place = CPUPlace() if place is None else place
-        self.scope = scope
-        self.train_graph = GraphWrapper(
-            train_program, in_nodes=train_feed_list, out_nodes=train_fetch_list)
-        self.eval_graph = GraphWrapper(
-            eval_program, in_nodes=eval_feed_list, out_nodes=eval_fetch_list)
-        self.train_reader = train_reader
-        self.eval_reader = eval_reader
-        self.eval_func = eval_func
-        self.save_eval_model = save_eval_model
-        self.prune_infer_model = prune_infer_model
-
-        self.teacher_graphs = []
-        for teacher in teacher_programs:
-            self.teacher_graphs.append(GraphWrapper(teacher))
-
-        self.checkpoint = None
-        self.checkpoint_path = checkpoint_path
-        self.eval_epoch = 1
-
-        self.train_optimizer = train_optimizer
-        self.distiller_optimizer = distiller_optimizer
-        self.init_model = None
-
-        self.search_space = search_space
-        self.log_period = log_period
-        assert (log_period > 0)
-
-    def _add_strategy(self, strategy):
-        """
-        Add a strategy to current compress pass.
-        Args:
-            strategy: The strategy to be added into current compress pass.
-        """
-        self.strategies.append(strategy)
-        self.epoch = max(strategy.end_epoch, self.epoch)
-
-    def config(self, config_file):
-        """
-        Configure the compress pass from file with yaml format.
-        Args:
-            config_file(str): The config file in local file system.
-        """
-        factory = ConfigFactory(config_file)
-        self.epoch = factory.compressor['epoch']
-        for strategy in factory.compressor['strategies']:
-            self._add_strategy(strategy)
-        if 'checkpoint_path' in factory.compressor:
-            self.checkpoint_path = factory.compressor['checkpoint_path']
-
-        if 'init_model' in factory.compressor:
-            self.init_model = factory.compressor['init_model']
-
-        if 'eval_epoch' in factory.compressor:
-            self.eval_epoch = factory.compressor['eval_epoch']
-        assert (self.eval_epoch > 0)
-
-    def _init_model(self, context):
-        """
-        Load model that has been compressed. 
-        """
-        if self.init_model and os.path.exists(self.init_model):
-            exe = SlimGraphExecutor(context.place)
-            with scope_guard(context.scope):
-                context.train_graph.load_persistables(self.init_model, exe)
-            flops = context.eval_graph.flops()
-            conv_flops = context.eval_graph.flops(only_conv=True)
-            context.eval_graph.update_param_shape(context.scope)
-            context.eval_graph.update_groups_of_conv()
-            _logger.info("conv flops: -{}".format(1 - float(
-                context.eval_graph.flops(only_conv=True)) / conv_flops))
-            _logger.info("total flops: -{}".format(1 - float(
-                context.eval_graph.flops()) / flops))
-            context.train_graph.update_param_shape(context.scope)
-            context.train_graph.update_groups_of_conv()
-            context.train_graph.infer_shape()
-            _logger.info("Init model from: {}".format(self.init_model))
-
-    def _load_checkpoint(self, context):
-        """
-        Load checkpoints from file.
-        """
-        _logger.debug('_load_checkpoint')
-        strategies = self.strategies
-        if self.checkpoint_path:
-            if not os.path.exists(self.checkpoint_path):
-                _logger.warning("Checkpints path doesn't exist: [{}]".format(
-                    self.checkpoint_path))
-                return context, strategies
-            checkpoints = [
-                dir for dir in os.listdir(self.checkpoint_path)
-                if os.path.isdir(os.path.join(self.checkpoint_path, dir))
-            ]
-            _logger.debug('self.checkpoint_path: {}'.format(
-                self.checkpoint_path))
-            _logger.info('checkpoints: {}'.format(checkpoints))
-            if len(checkpoints) > 0:
-                latest = max([int(ck) for ck in checkpoints])
-                latest_ck_path = os.path.join(self.checkpoint_path, str(latest))
-
-                model_path = os.path.join(latest_ck_path, 'model')
-                context_path = os.path.join(latest_ck_path, 'context')
-                strategy_path = os.path.join(latest_ck_path, 'strategies')
-                if os.path.exists(context_path):
-                    context.from_file(context_path)
-                    context.epoch_id += 1
-                if os.path.exists(strategy_path):
-                    with open(strategy_path, 'rb') as strategy_file:
-                        if sys.version_info < (3, 0):
-                            strategies = pickle.load(strategy_file)
-                        else:
-                            strategies = pickle.load(
-                                strategy_file, encoding='bytes')
-                assert (len(self.strategies) == len(strategies))
-                for s, s1 in zip(self.strategies, strategies):
-                    s1.__dict__.update(s.__dict__)
-
-                for strategy in strategies:
-                    strategy.restore_from_checkpoint(context)
-
-                if os.path.exists(model_path):
-                    exe = SlimGraphExecutor(context.place)
-                    with scope_guard(context.scope):
-                        context.optimize_graph.load_persistables(model_path,
-                                                                 exe)
-                    _logger.info("Loaded params from: {}".format(model_path))
-        return context, strategies
-
-    def _save_checkpoint(self, context):
-        """
-        Save checkpoints to file.
-        """
-        if context.epoch_id % 1 == 0 and self.checkpoint_path:
-            checkpoint_path = os.path.join(self.checkpoint_path,
-                                           str(context.epoch_id))
-            model_path = os.path.join(checkpoint_path, 'model')
-            eval_model_path = os.path.join(checkpoint_path, 'eval_model')
-            context_path = os.path.join(checkpoint_path, 'context')
-            strategy_path = os.path.join(checkpoint_path, 'strategies')
-            if not os.path.isdir(model_path):
-                os.makedirs(model_path)
-            exe = SlimGraphExecutor(context.place)
-            with scope_guard(context.scope):
-                context.optimize_graph.save_persistables(model_path, exe)
-                if self.save_eval_model:
-                    context.eval_graph.save_model(eval_model_path, exe)
-                if self.prune_infer_model:
-                    context.eval_graph.save_infer_model(
-                        eval_model_path,
-                        exe,
-                        self.prune_infer_model,
-                        program_only=self.save_eval_model)
-
-            context.to_file(context_path)
-            with open(strategy_path, 'wb') as strategy_file:
-                pickle.dump(self.strategies, strategy_file)
-            _logger.info('Saved checkpoint to: {}'.format(checkpoint_path))
-
-    def _train_one_epoch(self, context):
-        """
-        Train one epoch.
-        """
-        if context.skip_training:
-            return
-        executor = SlimGraphExecutor(self.place)
-
-        if context.optimize_graph.compiled_graph is None:
-            build_strategy = compiler.BuildStrategy()
-            build_strategy.fuse_all_reduce_ops = False
-            context.optimize_graph.compiled_graph = compiler.CompiledProgram(
-                context.optimize_graph.program).with_data_parallel(
-                    loss_name=context.optimize_graph.out_nodes['loss'],
-                    build_strategy=build_strategy)
-
-        if isinstance(context.train_reader, Variable) or (
-                isinstance(context.train_reader, DataLoaderBase) and
-            (not context.train_reader.iterable)):
-            context.train_reader.start()
-            try:
-                while True:
-
-                    for strategy in self.strategies:
-                        strategy.on_batch_begin(context)
-                    results = executor.run(context.optimize_graph,
-                                           context.scope)
-                    results = [float(np.mean(result)) for result in results]
-                    if context.batch_id % self.log_period == 0:
-                        _logger.info("epoch:{}; batch_id:{}; {} = {}".format(
-                            context.epoch_id, context.batch_id,
-                            context.optimize_graph.out_nodes.keys(
-                            ), [round(r, 6) for r in results]))
-                    for strategy in self.strategies:
-                        strategy.on_batch_end(context)
-                    context.batch_id += 1
-
-            except EOFException:
-                context.train_reader.reset()
-
-        else:
-            for data in context.train_reader():
-                for strategy in self.strategies:
-                    strategy.on_batch_begin(context)
-                results = executor.run(context.optimize_graph,
-                                       context.scope,
-                                       data=data)
-                results = [float(np.mean(result)) for result in results]
-                if context.batch_id % self.log_period == 0:
-                    _logger.info("epoch:{}; batch_id:{}; {} = {}".format(
-                        context.epoch_id, context.batch_id,
-                        context.optimize_graph.out_nodes.keys(
-                        ), [round(r, 6) for r in results]))
-                for strategy in self.strategies:
-                    strategy.on_batch_end(context)
-                context.batch_id += 1
-        context.batch_id = 0
-
-    def _eval(self, context):
-        """
-        Runing evaluation.
-        """
-        if self.eval_func is not None:
-            for key in self.eval_func:
-                func = self.eval_func[key]
-                if key not in context.eval_results:
-                    context.eval_results[key] = []
-                context.eval_results[key].append(
-                    func(self.eval_graph.program, self.scope))
-        else:
-            results, names = context.run_eval_graph()
-            for name, result in zip(names, results):
-                if name not in context.eval_results:
-                    context.eval_results[name] = []
-                context.eval_results[name].append(result)
-
-    def run(self):
-        """
-        Execute compressing pass.
-        """
-        context = Context(
-            place=self.place,
-            scope=self.scope,
-            train_graph=self.train_graph,
-            train_reader=self.train_reader,
-            eval_graph=self.eval_graph,
-            eval_reader=self.eval_reader,
-            teacher_graphs=self.teacher_graphs,
-            train_optimizer=self.train_optimizer,
-            distiller_optimizer=self.distiller_optimizer,
-            search_space=self.search_space)
-        self.context = context
-        if self.teacher_graphs:
-            context.put('teachers', self.teacher_graphs)
-        self._init_model(context)
-        if not context.optimize_graph:
-            if context.train_optimizer:
-                context.train_optimizer._name = 'train_opt'
-                context.optimize_graph = context.train_graph.get_optimize_graph(
-                    context.train_optimizer, context.place, context.scope)
-            else:
-                context.optimize_graph = context.train_graph
-
-        context, self.strategies = self._load_checkpoint(context)
-
-        for strategy in self.strategies:
-            strategy.on_compression_begin(context)
-        if 'MKLDNNPostTrainingQuantStrategy' in [
-                i.__class__.__name__ for i in self.strategies
-        ]:
-            return None
-        start = context.epoch_id
-        for epoch in range(start, self.epoch):
-            context.epoch_id = epoch
-            try:
-                for strategy in self.strategies:
-                    strategy.on_epoch_begin(context)
-                self._train_one_epoch(context)
-                if self.eval_epoch and epoch % self.eval_epoch == 0:
-                    self._eval(context)
-                self._save_checkpoint(context)
-                for strategy in self.strategies:
-                    strategy.on_epoch_end(context)
-            except Exception:
-                _logger.error(traceback.print_exc())
-                continue
-        for strategy in self.strategies:
-            strategy.on_compression_end(context)
-        return context.eval_graph
--- a/python/paddle/fluid/contrib/slim/core/config.py
+++ b/python/paddle/fluid/contrib/slim/core/config.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import inspect
-import funcsigs
-import yaml
-from collections import OrderedDict
-from ..prune import *
-from ..quantization import *
-from .strategy import *
-from ..distillation import *
-from ..searcher import *
-from ..nas import *
-
-__all__ = ['ConfigFactory']
-"""This factory is used to create instances by loading and parsing configure file with yaml format.
-"""
-
-PLUGINS = ['pruners', 'quantizers', 'distillers', 'strategies', 'controllers']
-
-
-class ConfigFactory(object):
-    def __init__(self, config):
-        """Init a factory from configure file."""
-        self.instances = {}
-        self.compressor = {}
-        self.version = None
-        self._parse_config(config)
-
-    def instance(self, name):
-        """
-        Get instance from factory.
-        """
-        if name in self.instances:
-            return self.instances[name]
-        else:
-            return None
-
-    def _new_instance(self, name, attrs):
-        if name not in self.instances:
-            class_ = globals()[attrs['class']]
-            sig = funcsigs.signature(class_.__init__)
-            keys = [
-                param.name for param in sig.parameters.values()
-                if (param.kind == param.POSITIONAL_OR_KEYWORD)
-            ][1:]
-            keys = set(attrs.keys()).intersection(set(keys))
-            args = {}
-            for key in keys:
-                value = attrs[key]
-                if isinstance(value, str) and value.lower() == 'none':
-                    value = None
-                if isinstance(value, str) and value in self.instances:
-                    value = self.instances[value]
-                if isinstance(value, list):
-                    for i in range(len(value)):
-                        if isinstance(value[i],
-                                      str) and value[i] in self.instances:
-                            value[i] = self.instances[value[i]]
-
-                args[key] = value
-            self.instances[name] = class_(**args)
-        return self.instances.get(name)
-
-    def _parse_config(self, config):
-        assert config
-        with open(config, 'r') as config_file:
-            key_values = self._ordered_load(config_file)
-            for key in key_values:
-                # parse version
-                if key == 'version' and self.version is None:
-                    self.version = int(key_values['version'])
-                    assert self.version == int(key_values['version'])
-
-                # parse pruners
-                if key in PLUGINS:
-                    instances = key_values[key]
-                    for name in instances:
-                        self._new_instance(name, instances[name])
-
-                if key == 'compressor':
-                    self.compressor['strategies'] = []
-                    self.compressor['epoch'] = key_values[key]['epoch']
-                    if 'init_model' in key_values[key]:
-                        self.compressor['init_model'] = key_values[key][
-                            'init_model']
-                    if 'checkpoint_path' in key_values[key]:
-                        self.compressor['checkpoint_path'] = key_values[key][
-                            'checkpoint_path']
-                    if 'eval_epoch' in key_values[key]:
-                        self.compressor['eval_epoch'] = key_values[key][
-                            'eval_epoch']
-                    if 'strategies' in key_values[key]:
-                        for name in key_values[key]['strategies']:
-                            strategy = self.instance(name)
-                            self.compressor['strategies'].append(strategy)
-
-                if key == 'include':
-                    for config_file in key_values[key]:
-                        self._parse_config(config_file.strip())
-
-    def _ordered_load(self,
-                      stream,
-                      Loader=yaml.Loader,
-                      object_pairs_hook=OrderedDict):
-        """
-        See: https://stackoverflow.com/questions/5121931/in-python-how-can-you-load-yaml-mappings-as-ordereddicts
-        """
-
-        class OrderedLoader(Loader):
-            pass
-
-        def construct_mapping(loader, node):
-            loader.flatten_mapping(node)
-            return object_pairs_hook(loader.construct_pairs(node))
-
-        OrderedLoader.add_constructor(
-            yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, construct_mapping)
-        return yaml.load(stream, OrderedLoader)
--- a/python/paddle/fluid/contrib/slim/core/strategy.py
+++ b/python/paddle/fluid/contrib/slim/core/strategy.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-__all__ = ['Strategy']
-
-
-class Strategy(object):
-    """
-    Base class for all strategies.
-    """
-
-    def __init__(self, start_epoch=0, end_epoch=0):
-        """
-        Args:
-            start_epoch: The first epoch to apply the strategy.
-            end_epoch: The last epoch to apply the strategy.
-        """
-        self.start_epoch = start_epoch
-        self.end_epoch = end_epoch
-
-    def __getstate__(self):
-        d = {}
-        for key in self.__dict__:
-            if key not in ["start_epoch", "end_epoch"]:
-                d[key] = self.__dict__[key]
-        return d
-
-    def on_compression_begin(self, context):
-        pass
-
-    def on_epoch_begin(self, context):
-        pass
-
-    def on_epoch_end(self, context):
-        pass
-
-    def on_batch_begin(self, context):
-        pass
-
-    def on_batch_end(self, context):
-        pass
-
-    def on_compression_end(self, context):
-        pass
-
-    def restore_from_checkpoint(self, context):
-        pass
--- a/python/paddle/fluid/contrib/slim/distillation/__init__.py
+++ b/python/paddle/fluid/contrib/slim/distillation/__init__.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from . import distiller
-from .distiller import *
-from . import distillation_strategy
-from .distillation_strategy import *
-
-__all__ = distiller.__all__
-__all__ += distillation_strategy.__all__
--- a/python/paddle/fluid/contrib/slim/distillation/distillation_strategy.py
+++ b/python/paddle/fluid/contrib/slim/distillation/distillation_strategy.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from ..core.strategy import Strategy
-from ....framework import Program, Variable, program_guard
-from ....log_helper import get_logger
-from .... import Executor
-import logging
-
-__all__ = ['DistillationStrategy']
-
-_logger = get_logger(
-    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
-
-
-class DistillationStrategy(Strategy):
-    def __init__(self, distillers=None, start_epoch=0, end_epoch=0):
-        """
-        Args:
-            distillers(list): A list of distiller used to combine student graph and teacher graph
-                              by adding some loss.
-            start_epoch(int): The epoch when to merge student graph and teacher graph for
-                              distillation training. default: 0
-            end_epoch(int): The epoch when to finish distillation training. default: 0
-            
-        """
-        super(DistillationStrategy, self).__init__(start_epoch, end_epoch)
-        self.distillers = distillers
-
-    def restore_from_checkpoint(self, context):
-        # load from checkpoint
-        if context.epoch_id > 0:
-            if context.epoch_id > self.start_epoch and context.epoch_id < self.end_epoch:
-                _logger.info('Restore DistillationStrategy')
-                self._create_distillation_graph(context)
-                _logger.info('Restore DistillationStrategy finish.')
-
-    def on_epoch_begin(self, context):
-        if self.start_epoch == context.epoch_id:
-            _logger.info('DistillationStrategy::on_epoch_begin.')
-            self._create_distillation_graph(context)
-            _logger.info('DistillationStrategy set optimize_graph.')
-
-    def _create_distillation_graph(self, context):
-        """
-        step 1: Merge student graph and teacher graph into distillation graph.
-        step 2: Add loss into distillation graph by distillers.
-        step 3: Append backward ops and optimize ops into distillation graph for training.
-        """
-        # step 1
-        teacher = context.teacher_graphs[0]
-        for var in teacher.program.list_vars():
-            var.stop_gradient = True
-        graph = context.train_graph.clone()
-        graph.merge(teacher)
-        if 'loss' in graph.out_nodes:
-            graph.out_nodes['student_loss'] = graph.out_nodes['loss']
-
-        # step 2
-        for distiller in self.distillers:
-            graph = distiller.distiller_loss(graph)
-
-        # step 3
-        startup_program = Program()
-        with program_guard(graph.program, startup_program):
-            context.distiller_optimizer._name = 'distillation_optimizer'
-
-            # The learning rate variable may be created in other program.
-            # Update information in optimizer to make
-            # learning rate variable being accessible in current program.
-            optimizer = context.distiller_optimizer
-            if isinstance(optimizer._learning_rate, Variable):
-                optimizer._learning_rate_map[
-                    graph.program] = optimizer._learning_rate
-
-            optimizer.minimize(graph.var(graph.out_nodes['loss'])._var)
-
-        exe = Executor(context.place)
-        exe.run(startup_program, scope=context.scope)
-
-        # backup graph for fine-tune after distillation
-        context.put('distillation_backup_optimize_graph',
-                    context.optimize_graph)
-        context.optimize_graph = graph
-
-    def on_epoch_end(self, context):
-        if context.epoch_id == (self.end_epoch - 1):
-            _logger.info('DistillationStrategy::on_epoch_end.')
-            # restore optimize_graph for fine-tune or other strategy in next stage.
-            context.optimize_graph = context.get(
-                'distillation_backup_optimize_graph')
-            _logger.info(
-                'DistillationStrategy set context.optimize_graph to None.')
--- a/python/paddle/fluid/contrib/slim/distillation/distiller.py
+++ b/python/paddle/fluid/contrib/slim/distillation/distiller.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from .... import layers
-from .... import optimizer
-from .... import Executor
-from .... import Program
-from .... import program_guard
-from .... import regularizer
-
-__all__ = ['FSPDistiller', 'L2Distiller', 'SoftLabelDistiller']
-
-
-class L2Distiller(object):
-    """
-    Combine two layers from student net and teacher net by l2-loss.
-    And add the loss into the total loss using for distillation training.
-    """
-
-    def __init__(self,
-                 student_feature_map,
-                 teacher_feature_map,
-                 distillation_loss_weight=1):
-        """
-        Args:
-            student_feature_map(str): The name of feature map from student network.
-            teacher_feature_map(str): The name of feature map from teacher network.
-                                      It's shape should be the same with student network.
-            distillation_loss_weight(float): The weight of the l2-loss.
-        """
-        self.student_feature_map = student_feature_map
-        self.teacher_feature_map = teacher_feature_map
-        self.distillation_loss_weight = distillation_loss_weight
-
-    def distiller_loss(self, graph):
-        """
-        Modify graph inplace to add l2-loss.
-        Args: 
-            graph(GraphWrapper): The graph to be modified.
-        Returns:
-            GraphWrapper: The modified graph.
-        """
-        distiller_pass = L2DistillerPass(self.student_feature_map,
-                                         self.teacher_feature_map,
-                                         self.distillation_loss_weight)
-        dis_graph = distiller_pass.apply(graph)
-        return dis_graph
-
-
-class L2DistillerPass(object):
-    """
-    The pass used to add l2-loss.
-    """
-
-    def __init__(self,
-                 student_feature_map,
-                 teacher_feature_map,
-                 distillation_loss_weight=1):
-        """
-        Args:
-            student_feature_map(str): The name of feature map from student network.
-            teacher_feature_map(str): The name of feature map from teacher network.
-                                      It's shape should be the same with student network.
-            distillation_loss_weight(float): The weight of the l2-loss.
-        """
-        self.student_feature_map = student_feature_map
-        self.teacher_feature_map = teacher_feature_map
-        self.distillation_loss_weight = distillation_loss_weight
-
-    def apply(self, graph):
-        ret_graph = graph
-        with program_guard(ret_graph.program):
-
-            student_feature_map = ret_graph.var(self.student_feature_map)._var
-            teacher_feature_map = ret_graph.var(self.teacher_feature_map)._var
-            l2loss = layers.reduce_mean(
-                layers.square(student_feature_map - teacher_feature_map))
-
-            distillation_loss = l2loss * self.distillation_loss_weight
-            student_loss = 0
-            if 'loss' in ret_graph.out_nodes:
-                student_loss = ret_graph.var(ret_graph.out_nodes['loss'])._var
-            loss = distillation_loss + student_loss
-
-            ret_graph.out_nodes['loss'] = loss.name
-            ret_graph.out_nodes[
-                'l2loss_' + self.student_feature_map + "_" +
-                self.teacher_feature_map] = distillation_loss.name
-        return ret_graph
-
-
-class FSPDistiller(object):
-    """
-    Combine layers from student net and teacher net by fsp-loss.
-    """
-
-    def __init__(self, student_pairs, teacher_pairs,
-                 distillation_loss_weight=1):
-        """
-        Args:
-            student_pairs(list<tuple>): Each tuple, with two variable names, in student_pairs indicates
-                                        a section in student network. The variables in a tuple should
-                                        have the same feature map size.
-            teacher_pairs(list<tuple>): Each tuple, with two variable names, in teacher_pairs indicates
-                                        a section in teacher network. The variables in a tuple should
-                                        have the same feature map size. Varibale named teacher_pairs[i][j]
-                                        should has the save channel number with that of variable named 
-                                        student_pairs[i][j].
-
-            distillation_loss_weight(float): The weight of the fsp-loss. default: 1.
-        """
-        self.student_pairs = student_pairs
-        self.teacher_pairs = teacher_pairs
-        self.distillation_loss_weight = distillation_loss_weight
-
-    def distiller_loss(self, graph):
-        """
-        Modify graph inplace to add fsp-loss.
-        Args: 
-            graph(GraphWrapper): The graph to be modified.
-        Returns:
-            GraphWrapper: The modified graph.
-        """
-        distiller_pass = FSPDistillerPass(self.student_pairs,
-                                          self.teacher_pairs,
-                                          self.distillation_loss_weight)
-        dis_graph = distiller_pass.apply(graph)
-        return dis_graph
-
-
-class FSPDistillerPass(object):
-    '''
-    Combine layers from student net and teacher net by fsp-loss.
-    '''
-
-    def __init__(self, s_pairs, t_pairs, distillation_loss_weight=1):
-        """
-        Args:
-            s_pairs(list<tuple>): Each tuple, with two variable names, in student_pairs indicates
-                                        a section in student network. The variables in a tuple should
-                                        have the same feature map size.
-            t_pairs(list<tuple>): Each tuple, with two variable names, in teacher_pairs indicates
-                                        a section in teacher network. The variables in a tuple should
-                                        have the same feature map size. Varibale named teacher_pairs[i][j]
-                                        should has the save channel number with that of variable named 
-                                        student_pairs[i][j].
-
-            distillation_loss_weight(float): The weight of the fsp-loss. default: 1.
-        """
-        self.s_pairs = s_pairs
-        self.t_pairs = t_pairs
-        self.distillation_loss_weight = distillation_loss_weight
-
-    def apply(self, graph):
-        ret_graph = graph
-        with program_guard(ret_graph.program):
-            losses = []
-            for s_pair, t_pair in zip(self.s_pairs, self.t_pairs):
-                s_pair_start = ret_graph.var(s_pair[0])._var
-                s_pair_end = ret_graph.var(s_pair[1])._var
-                s_fsp_matrix = self._fsp_matrix(s_pair_start, s_pair_end)
-                t_pair_start = ret_graph.var(t_pair[0])._var
-                t_pair_end = ret_graph.var(t_pair[1])._var
-                t_fsp_matrix = self._fsp_matrix(t_pair_start, t_pair_end)
-                l2_loss = layers.reduce_mean(
-                    layers.square(s_fsp_matrix - t_fsp_matrix))
-                losses.append(l2_loss)
-            distillation_loss = layers.sum(
-                losses) * self.distillation_loss_weight
-            student_loss = 0
-            if 'loss' in ret_graph.out_nodes:
-                student_loss = ret_graph.var(ret_graph.out_nodes['loss'])._var
-            loss = distillation_loss + student_loss
-
-            ret_graph.out_nodes['loss'] = loss.name
-            ret_graph.out_nodes[
-                'fsp_distillation_loss'] = distillation_loss.name
-        return ret_graph
-
-    def _fsp_matrix(self, fea_map_0, fea_map_1):
-        return layers.fsp_matrix(fea_map_0, fea_map_1)
-
-
-class SoftLabelDistiller(object):
-    """
-    Combine two layers from student net and teacher net by softmax_with_cross_entropy loss.
-    And add the loss into the total loss using for distillation training.
-    """
-
-    def __init__(self,
-                 student_feature_map=None,
-                 teacher_feature_map=None,
-                 student_temperature=1.0,
-                 teacher_temperature=1.0,
-                 distillation_loss_weight=1):
-        """
-        Args:
-            student_feature_map(str): The name of feature map from student network.
-            teacher_feature_map(str): The name of feature map from teacher network.
-                                      It's shape should be the same with student network.
-            student_temperature(float): Temperature used to divide student_feature_map before softmax_with_cross_entropy. default: 1.0
-            teacher_temperature(float): Temperature used to divide teacher_feature_map before softmax_with_cross_entropy. default: 1.0
-            distillation_loss_weight(float): The weight of the l2-loss.
-        """
-
-        self.student_feature_map = student_feature_map
-        self.teacher_feature_map = teacher_feature_map
-        self.distillation_loss_weight = distillation_loss_weight
-        self.student_temperature = student_temperature
-        self.teacher_temperature = teacher_temperature
-
-    def distiller_loss(self, graph):
-        """
-        Modify graph inplace to add softmax_with_cross_entropy loss.
-        Args: 
-            graph(GraphWrapper): The graph to be modified.
-        Returns:
-            GraphWrapper: The modified graph.
-        """
-        distiller_pass = SoftLabelDistillerPass(
-            self.student_feature_map, self.teacher_feature_map,
-            self.student_temperature, self.teacher_temperature,
-            self.distillation_loss_weight)
-        dis_graph = distiller_pass.apply(graph)
-        return dis_graph
-
-
-class SoftLabelDistillerPass(object):
-    def __init__(self,
-                 student_feature_map,
-                 teacher_feature_map,
-                 student_temperature,
-                 teacher_temperature,
-                 distillation_loss_weight=1):
-        """
-        Args:
-            student_feature_map(str): The name of feature map from student network.
-            teacher_feature_map(str): The name of feature map from teacher network.
-                                      It's shape should be the same with student network.
-            student_temperature(float): Temperature used to divide student_feature_map before softmax_with_cross_entropy.
-            teacher_temperature(float): Temperature used to divide teacher_feature_map before softmax_with_cross_entropy.
-            distillation_loss_weight(float): The weight of the l2-loss.
-        """
-        self.student_feature_map = student_feature_map
-        self.teacher_feature_map = teacher_feature_map
-        self.student_temperature = student_temperature
-        self.teacher_temperature = teacher_temperature
-        self.distillation_loss_weight = distillation_loss_weight
-
-    def apply(self, graph):
-        ret_graph = graph
-        with program_guard(ret_graph.program):
-
-            student_feature_map = ret_graph.var(self.student_feature_map)._var
-            teacher_feature_map = ret_graph.var(self.teacher_feature_map)._var
-            s_fea = layers.softmax(student_feature_map /
-                                   self.student_temperature)
-            t_fea = layers.softmax(teacher_feature_map /
-                                   self.teacher_temperature)
-            t_fea.stop_gradient = True
-            ce_loss = layers.reduce_mean(
-                layers.cross_entropy(
-                    s_fea, t_fea, soft_label=True))
-            distillation_loss = ce_loss * self.distillation_loss_weight
-            student_loss = 0
-            if 'loss' in ret_graph.out_nodes:
-                student_loss = ret_graph.var(ret_graph.out_nodes['loss'])._var
-            loss = distillation_loss + student_loss
-
-            ret_graph.out_nodes['loss'] = loss.name
-            ret_graph.out_nodes[
-                'soft_label_loss_' + self.student_feature_map + "_" +
-                self.teacher_feature_map] = distillation_loss.name
-        return ret_graph
--- a/python/paddle/fluid/contrib/slim/graph/__init__.py
+++ b/python/paddle/fluid/contrib/slim/graph/__init__.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from . import executor
-from .executor import *
-from . import graph_wrapper
-from .graph_wrapper import *
-__all__ = executor.__all__
-__all__ += graph_wrapper.__all__
--- a/python/paddle/fluid/contrib/slim/graph/executor.py
+++ b/python/paddle/fluid/contrib/slim/graph/executor.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from ....compiler import CompiledProgram
-from ....data_feeder import DataFeeder
-from .... import executor
-from .graph_wrapper import GraphWrapper
-
-__all__ = ['SlimGraphExecutor']
-
-
-class SlimGraphExecutor(object):
-    """
-    Wrapper of executor used to run GraphWrapper.
-    """
-
-    def __init__(self, place):
-        self.exe = executor.Executor(place)
-        self.place = place
-
-    def run(self, graph, scope, data=None):
-        """
-        Runing a graph with a batch of data.
-        Args:
-            graph(GraphWrapper): The graph to be executed.
-            scope(fluid.core.Scope): The scope to be used.
-            data(list<tuple>): A batch of data. Each tuple in this list is a sample.
-                               It will feed the items of tuple to the in_nodes of graph.
-        Returns:
-            results(list): A list of result with the same order indicated by graph.out_nodes.
-        """
-        assert isinstance(graph, GraphWrapper)
-        feed = None
-        if data is not None and isinstance(data[0], dict):
-            # return list = False
-            feed = data
-        elif data is not None:
-            feeder = DataFeeder(
-                feed_list=list(graph.in_nodes.values()),
-                place=self.place,
-                program=graph.program)
-            feed = feeder.feed(data)
-
-        fetch_list = list(graph.out_nodes.values())
-        program = graph.compiled_graph if graph.compiled_graph else graph.program
-        results = self.exe.run(program,
-                               scope=scope,
-                               fetch_list=fetch_list,
-                               feed=feed)
-        return results
--- a/python/paddle/fluid/contrib/slim/graph/graph_wrapper.py
+++ b/python/paddle/fluid/contrib/slim/graph/graph_wrapper.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from collections import OrderedDict
-from .... import io
-from .... import compiler
-from ....framework import Program
-from ....framework import program_guard
-from ....framework import Parameter
-from ....framework import Variable
-from ....executor import Executor
-import copy
-from collections import Iterable
-from ....io import save_inference_model, load_inference_model, save_persistables
-import numpy as np
-import pickle
-import os
-
-__all__ = ['GraphWrapper', 'VarWrapper', 'OpWrapper']
-
-OPTIMIZER_OPS = [
-    'momentum',
-    'lars_momentum',
-    'adagrad',
-    'adam',
-    'adamax',
-    'dpsgd',
-    'decayed_adagrad',
-    'adadelta',
-    'rmsprop',
-]
-
-
-class VarWrapper(object):
-    def __init__(self, var, graph):
-        assert isinstance(var, Variable)
-        assert isinstance(graph, GraphWrapper)
-        self._var = var
-        self._graph = graph
-
-    def __eq__(self, v):
-        """
-        Overwrite this function for ...in... syntax in python.
-        """
-        return self._var.name == v._var.name
-
-    def name(self):
-        """
-        Get the name of the variable.
-        """
-        return self._var.name
-
-    def shape(self):
-        """
-        Get the shape of the variable.
-        """
-        return self._var.shape
-
-    def set_shape(self, shape):
-        """
-        Set the shape of the variable.
-        """
-        self._var.desc.set_shape(shape)
-
-    def inputs(self):
-        """
-        Get all the operators that use this variable as output.
-        Returns:
-            list<OpWrapper>: A list of operators.
-        """
-        ops = []
-        for op in self._graph.ops():
-            if self in op.all_inputs():
-                ops.append(op)
-        return ops
-
-    def outputs(self):
-        """
-        Get all the operators that use this variable as input.
-        Returns:
-            list<OpWrapper>: A list of operators.
-        """
-        ops = []
-        for op in self._graph.ops():
-            if self in op.all_outputs():
-                ops.append(op)
-        return ops
-
-
-class OpWrapper(object):
-    def __init__(self, op, graph):
-        assert isinstance(graph, GraphWrapper)
-        self._op = op
-        self._graph = graph
-
-    def __eq__(self, op):
-        """
-        Overwrite this function for ...in... syntax in python.
-        """
-        return self.idx() == op.idx()
-
-    def all_inputs(self):
-        """
-        Get all the input variables of this operator.
-        """
-        return [
-            self._graph.var(var_name) for var_name in self._op.input_arg_names
-        ]
-
-    def all_outputs(self):
-        """
-        Get all the output variables of this operator.
-        """
-        return [
-            self._graph.var(var_name) for var_name in self._op.output_arg_names
-        ]
-
-    def idx(self):
-        """
-        Get the id of this operator.
-        """
-        return self._op.idx
-
-    def type(self):
-        """
-        Get the type of this operator.
-        """
-        return self._op.type
-
-    def is_bwd_op(self):
-        """
-        Whether this operator is backward op.
-        """
-        return self.type().endswith('_grad')
-
-    def is_opt_op(self):
-        """
-        Whether this operator is optimizer op.
-        """
-        return self.type() in OPTIMIZER_OPS
-
-    def inputs(self, name):
-        """
-        Get all the variables by the input name.
-        """
-        return [self._graph.var(var_name) for var_name in self._op.input(name)]
-
-    def outputs(self, name):
-        """
-        Get all the variables by the output name.
-        """
-        return [self._graph.var(var_name) for var_name in self._op.output(name)]
-
-    def set_attr(self, key, value):
-        """
-        Set the value of attribute by attribute's name.
-
-        Args:
-            key(str): the attribute name.
-            value(bool|int|str|float|list): the value of the attribute.
-        """
-        self._op._set_attr(key, value)
-
-    def attr(self, name):
-        """
-        Get the attribute by name.
-
-        Args:
-            name(str): the attribute name.
-
-        Returns:
-            bool|int|str|float|list: The attribute value. The return value
-            can be any valid attribute type.
-        """
-        return self._op.attr(name)
-
-
-class GraphWrapper(object):
-    """
-    It is a wrapper of paddle.fluid.framework.IrGraph with some special functions
-    for paddle slim framework.
-    """
-
-    def __init__(self, program=None, in_nodes=[], out_nodes=[]):
-        """
-        Args:
-            program(framework.Program): A program with 
-            in_nodes(dict): A dict to indicate the input nodes of the graph.
-                            The key is user-defined and human-readable name.
-                            The value is the name of Variable.
-            out_nodes(dict): A dict to indicate the input nodes of the graph.
-                            The key is user-defined and human-readable name.
-                            The value is the name of Variable.
-        """
-        super(GraphWrapper, self).__init__()
-        self.program = Program() if program is None else program
-        self.persistables = {}
-        self.teacher_persistables = {}
-        for var in self.program.list_vars():
-            if var.persistable:
-                self.persistables[var.name] = var
-        self.compiled_graph = None
-        in_nodes = [] if in_nodes is None else in_nodes
-        out_nodes = [] if out_nodes is None else out_nodes
-        self.in_nodes = OrderedDict(in_nodes)
-        self.out_nodes = OrderedDict(out_nodes)
-        self._attrs = OrderedDict()
-
-    def all_parameters(self):
-        """
-        Get all the parameters in this graph.
-        Returns:
-            list<VarWrapper>: A list of VarWrapper instances.
-        """
-        params = []
-        for block in self.program.blocks:
-            for param in block.all_parameters():
-                params.append(VarWrapper(param, self))
-        return params
-
-    def is_parameter(self, var):
-        """
-        Whether the given variable is parameter.
-        Args:
-            var(VarWrapper): The given variable.
-        """
-        return isinstance(var._var, Parameter)
-
-    def is_persistable(self, var):
-        """
-        Whether the given variable is persistable.
-        Args:
-            var(VarWrapper): The given variable.
-        """
-        return var._var.persistable
-
-    def compile(self, for_parallel=True, for_test=False, mem_opt=False):
-        """
-        Compile the program in this wrapper to framework.CompiledProgram for next running.
-        This function must be called if the program is modified.
-        Args:
-            for_parallel(bool): Whether the program to run in data parallel way. default: True.
-            for_test(bool): Whether the compiled program is used for test.
-        """
-        target = self.program
-        if for_test:
-            loss = None
-        else:
-            loss = self.out_nodes['loss']
-        if for_parallel:
-            # disable memory optimize for stable training
-            build_strategy = compiler.BuildStrategy()
-            build_strategy.enable_inplace = mem_opt
-            build_strategy.memory_optimize = mem_opt
-            build_strategy.fuse_all_reduce_ops = False
-            #            build_strategy.async_mode = False
-            self.compiled_graph = compiler.CompiledProgram(
-                target).with_data_parallel(
-                    loss_name=loss, build_strategy=build_strategy)
-        else:
-            self.compiled_graph = compiler.CompiledProgram(target)
-
-    def ops(self):
-        """
-        Return all operator nodes included in the graph as a set.
-        """
-        ops = []
-        for block in self.program.blocks:
-            for op in block.ops:
-                ops.append(OpWrapper(op, self))
-        return ops
-
-    def vars(self):
-        """
-        Get all the variables.
-        """
-        return [VarWrapper(var, self) for var in self.program.list_vars()]
-
-    def var(self, name):
-        """
-        Get the variable by variable name.
-        """
-        return VarWrapper(self.program.global_block().var(name), self)
-
-    def clone(self, for_test=False):
-        """
-        Clone a new graph from current graph.
-        Returns:
-            (GraphWrapper): The wrapper of a new graph.
-        """
-        return GraphWrapper(
-            self.program.clone(for_test),
-            copy.deepcopy(self.in_nodes), copy.deepcopy(self.out_nodes))
-
-    def merge(self, graph):
-        """
-        Merge a graph into current graph.
-        Args:
-            graph(GraphWrapper): The graph to be merged by current graph.
-        """
-        for var in graph.program.list_vars():
-            if var.persistable:
-                self.teacher_persistables[var.name] = var
-            new_var = self.program.global_block()._clone_variable(
-                var, force_persistable=False)
-            new_var.stop_gradient = var.stop_gradient
-            # TODO: parameters should be cloned
-        for op in graph.ops():
-            op = op._op
-            inputs = {}
-            outputs = {}
-            attrs = {}
-            for input_name in op.input_names:
-                inputs[input_name] = [
-                    self.var(in_var_name)._var
-                    for in_var_name in op.input(input_name)
-                ]
-            for output_name in op.output_names:
-                outputs[output_name] = [
-                    self.var(out_var_name)._var
-                    for out_var_name in op.output(output_name)
-                ]
-            for attr_name in op.attr_names:
-                attrs[attr_name] = op.attr(attr_name)
-            self.program.global_block().append_op(
-                type=op.type, inputs=inputs, outputs=outputs, attrs=attrs)
-
-    def program(self):
-        """
-        Get the program in current wrapper.
-        """
-        return self.program
-
-    def pre_ops(self, op):
-        """
-        Get all the previous operators of target operator.
-        Args:
-            op(OpWrapper): Target operator..
-        Returns:
-            list<OpWrapper>: A list of operators.
-        """
-        ops = []
-        for p in self.ops():
-            for in_var in op.all_inputs():
-                if in_var in p.all_outputs():
-                    ops.append(p)
-        return ops
-
-    def next_ops(self, op):
-        """
-        Get all the next operators of target operator.
-        Args:
-            op(OpWrapper): Target operator..
-        Returns:
-            list<OpWrapper>: A list of operators.
-        """
-        ops = []
-        for p in self.ops():
-            for out_var in op.all_outputs():
-                if out_var in p.all_inputs():
-                    ops.append(p)
-        return ops
-
-    def get_param_by_op(self, op):
-        """
-        Get the parameters used by target operator.
-        """
-        assert isinstance(op, OpWrapper)
-        params = []
-        for var in op.all_inputs():
-            if isinstance(var._var, Parameter):
-                params.append(var)
-        assert len(params) > 0
-        return params
-
-    def numel_params(self):
-        """
-        Get the number of elements in all parameters.
-        """
-        ret = 0
-        for param in self.all_parameters():
-            ret += np.product(param.shape())
-        return ret
-
-    def get_optimize_graph(self, optimizer, place, scope, no_grad_var_names=[]):
-        """
-        Get a new graph for training by appending some backward operators and optimization operators.
-        Args:
-            optimizer: The optimizer used to generate training graph.
-            place: The place to run the graph.
-            scope: The scope used to run the graph. Some new variable will be added into this scope.
-            no_grad_var_names(list<str>): Names of variables that should be ignored while computing gradients. default: [].
-        Returns:
-            (GraphWrapper): The wrapper of new graph with backward ops and optimization ops. 
-        """
-        graph = self.clone()
-        startup_program = Program()
-        with program_guard(
-                main_program=graph.program, startup_program=startup_program):
-            target_name = None
-            if 'loss' in graph.out_nodes:
-                target_name = graph.out_nodes['loss']
-            elif 'cost' in graph.out_nodes:
-                target_name = graph.out_nodes['cost']
-            else:
-                return None
-            target = graph.var(target_name)._var
-            # The learning rate variable may be created in other program.
-            # Update information in optimizer to make
-            # learning rate variable being accessible in current program.
-            if isinstance(optimizer._learning_rate, Variable):
-                optimizer._learning_rate_map[
-                    graph.program] = optimizer._learning_rate
-            optimizer.minimize(target, no_grad_set=no_grad_var_names)
-
-        exe = Executor(place)
-        exe.run(program=startup_program, scope=scope)
-        return graph
-
-    def flops(self, only_conv=False):
-        """
-        Get the flops of current graph.
-        Args:
-            only_conv: Only calculating the conv layers. default: False.
-        Returns:
-            int: The flops of current graph.
-        """
-        flops = 0
-        for op in self.ops():
-            if op.type() in ['conv2d', 'depthwise_conv2d']:
-                filter_shape = op.inputs("Filter")[0].shape()
-                input_shape = op.inputs("Input")[0].shape()
-                output_shape = op.outputs("Output")[0].shape()
-                c_out, c_in, k_h, k_w = filter_shape
-                _, _, h_out, w_out = output_shape
-                groups = op.attr("groups")
-                kernel_ops = k_h * k_w * (c_in / groups)
-                if len(op.inputs("Bias")) > 0:
-                    with_bias = 1
-                else:
-                    with_bias = 0
-                flops += 2 * h_out * w_out * c_out * (kernel_ops + with_bias)
-            elif op.type() == 'pool2d' and not only_conv:
-                input_shape = op.inputs("X")[0].shape()
-                output_shape = op.outputs("Out")[0].shape()
-                _, c_out, h_out, w_out = output_shape
-                k_size = op.attr("ksize")
-                flops += h_out * w_out * c_out * (k_size[0]**2)
-
-            elif op.type() == 'mul' and not only_conv:
-                x_shape = list(op.inputs("X")[0].shape())
-                y_shape = op.inputs("Y")[0].shape()
-                if x_shape[0] == -1:
-                    x_shape[0] = 1
-                flops += 2 * x_shape[0] * x_shape[1] * y_shape[1]
-
-            elif op.type() in ['relu', 'sigmoid', 'batch_norm'
-                               ] and not only_conv:
-                input_shape = list(op.inputs("X")[0].shape())
-                if input_shape[0] == -1:
-                    input_shape[0] = 1
-                flops += np.product(input_shape)
-
-        return flops
-
-    def save_model(self, path, exe):
-        """
-        Save network and parameters into file which can be load by load_inference_model api.
-        Args:
-            path(str): The path to save the persistables.
-            exe(framework.Executor): The executor used to save the persistables.
-        """
-        out_vars = [
-            self.var(var_name)._var for var_name in self.out_nodes.values()
-        ]
-        in_vars = list(self.in_nodes.values())
-        assert (len(in_vars) > 0)
-        assert (len(out_vars) > 0)
-        io.save_inference_model(
-            path,
-            in_vars,
-            out_vars,
-            exe.exe,
-            model_filename="__model__",
-            params_filename="__params__",
-            main_program=self.program.clone(),
-            export_for_deployment=True)
-
-    def save_infer_model(self, path, exe, in_out, program_only=False):
-        """
-        Save network and parameters into file which can be load by load_inference_model api.
-        Args:
-            path(str): The path to save the persistables.
-            exe(framework.Executor): The executor used to save the persistables.
-            in_out(tuple|list): in_out[0] is a list of input nodes' names
-            and in_out[1] is a list of output nodes' names.
-            program_only(bool): Whether to save program only.
-        """
-        out_vars = [self.var(var_name)._var for var_name in in_out[1]]
-        in_vars = list(in_out[0])
-        assert (len(in_vars) > 0)
-        assert (len(out_vars) > 0)
-        io.save_inference_model(
-            path,
-            in_vars,
-            out_vars,
-            exe.exe,
-            model_filename="__model__.infer",
-            params_filename="__params__",
-            program_only=program_only,
-            main_program=self.program.clone(),
-            export_for_deployment=True)
-
-    def save_persistables(self, path, exe):
-        """
-        Save all the persistable variables into file.
-        Args:
-            path(str): The path to save the persistables.
-            exe(framework.Executor): The executor used to save the persistables.
-        """
-        # update persistables from program
-        for var in self.program.list_vars():
-            if var.persistable and var.name not in self.persistables:
-                self.persistables[var.name] = var
-        persistables = []
-        for var in self.persistables:
-            if 'reader' not in var and 'double_buffer' not in var and var not in self.teacher_persistables:
-                persistables.append(self.persistables[var])
-
-        io.save_vars(exe.exe, path, vars=persistables)
-
-    def load_persistables(self, path, exe):
-        """
-        Load the persistable variables from file.
-        Args:
-            path(str): The path to load the persistables.
-            exe(framework.Executor): The executor used to load the persistables.
-        """
-
-        def if_exist(var):
-            return os.path.exists(os.path.join(path, var.name))
-
-        persistables = []
-        for var in self.persistables:
-            if 'reader' not in var and 'double_buffer' not in var:
-                persistables.append(self.persistables[var])
-        io.load_vars(exe.exe, path, vars=persistables, predicate=if_exist)
-
-    def update_param_shape(self, scope):
-        """
-        Update the shape of parameters in the graph according to tensors in scope.
-        It is used after loading pruned parameters from file.
-        """
-        for param in self.all_parameters():
-            tensor_shape = np.array(scope.find_var(param.name()).get_tensor(
-            )).shape
-            param.set_shape(tensor_shape)
-
-    def infer_shape(self):
-        """
-        Update the groups of convolution layer according to current filters.
-        It is used after loading pruned parameters from file.
-        """
-        for op in self.ops():
-            if op.type() != 'conditional_block':
-                op._op.desc.infer_shape(op._op.block.desc)
-
-    def update_groups_of_conv(self):
-        for op in self.ops():
-            if op.type() == 'depthwise_conv2d' or op.type(
-            ) == 'depthwise_conv2d_grad':
-                op.set_attr('groups', op.inputs('Filter')[0].shape()[0])
--- a/python/paddle/fluid/contrib/slim/nas/__init__.py
+++ b/python/paddle/fluid/contrib/slim/nas/__init__.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from . import light_nas_strategy
-from .light_nas_strategy import *
-from . import controller_server
-from .controller_server import *
-from . import search_agent
-from .search_agent import *
-from . import search_space
-from .search_space import *
-from . import lock
-from .lock import *
-
-__all__ = light_nas_strategy.__all__
-__all__ += controller_server.__all__
-__all__ += search_agent.__all__
-__all__ += search_space.__all__
--- a/python/paddle/fluid/contrib/slim/nas/controller_server.py
+++ b/python/paddle/fluid/contrib/slim/nas/controller_server.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import socket
-from threading import Thread
-from ....log_helper import get_logger
-
-__all__ = ['ControllerServer']
-
-_logger = get_logger(
-    __name__,
-    logging.INFO,
-    fmt='ControllerServer-%(asctime)s-%(levelname)s: %(message)s')
-
-
-class ControllerServer(object):
-    """
-    The controller wrapper with a socket server to handle the request of search agent.
-    """
-
-    def __init__(self,
-                 controller=None,
-                 address=('', 0),
-                 max_client_num=100,
-                 search_steps=None,
-                 key=None):
-        """
-        Args:
-            controller(slim.searcher.Controller): The controller used to generate tokens.
-            address(tuple): The address of current server binding with format (ip, port). Default: ('', 0).
-                            which means setting ip automatically
-            max_client_num(int): The maximum number of clients connecting to current server simultaneously. Default: 100.
-            search_steps(int): The total steps of searching. None means never stopping. Default: None 
-        """
-        self._controller = controller
-        self._address = address
-        self._max_client_num = max_client_num
-        self._search_steps = search_steps
-        self._closed = False
-        self._port = address[1]
-        self._ip = address[0]
-        self._key = key
-
-    def start(self):
-        self._socket_server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-        self._socket_server.bind(self._address)
-        self._socket_server.listen(self._max_client_num)
-        self._port = self._socket_server.getsockname()[1]
-        self._ip = self._socket_server.getsockname()[0]
-        _logger.info("listen on: [{}:{}]".format(self._ip, self._port))
-        thread = Thread(target=self.run)
-        thread.start()
-        return str(thread)
-
-    def close(self):
-        """Close the server."""
-        self._closed = True
-
-    def port(self):
-        """Get the port."""
-        return self._port
-
-    def ip(self):
-        """Get the ip."""
-        return self._ip
-
-    def run(self):
-        _logger.info("Controller Server run...")
-        while ((self._search_steps is None) or
-               (self._controller._iter <
-                (self._search_steps))) and not self._closed:
-            conn, addr = self._socket_server.accept()
-            message = conn.recv(1024).decode()
-            if message.strip("\n") == "next_tokens":
-                tokens = self._controller.next_tokens()
-                tokens = ",".join([str(token) for token in tokens])
-                conn.send(tokens.encode())
-            else:
-                _logger.info("recv message from {}: [{}]".format(addr, message))
-                messages = message.strip('\n').split("\t")
-                if (len(messages) < 3) or (messages[0] != self._key):
-                    _logger.info("recv noise from {}: [{}]".format(addr,
-                                                                   message))
-                    continue
-                tokens = messages[1]
-                reward = messages[2]
-                tokens = [int(token) for token in tokens.split(",")]
-                self._controller.update(tokens, float(reward))
-                tokens = self._controller.next_tokens()
-                tokens = ",".join([str(token) for token in tokens])
-                conn.send(tokens.encode())
-                _logger.info("send message to {}: [{}]".format(addr, tokens))
-            conn.close()
-        self._socket_server.close()
-        _logger.info("server closed!")
--- a/python/paddle/fluid/contrib/slim/nas/light_nas_strategy.py
+++ b/python/paddle/fluid/contrib/slim/nas/light_nas_strategy.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from ..core.strategy import Strategy
-from ..graph import GraphWrapper
-from .controller_server import ControllerServer
-from .search_agent import SearchAgent
-from ....executor import Executor
-from ....log_helper import get_logger
-import re
-import logging
-import functools
-import socket
-from .lock import lock, unlock
-
-__all__ = ['LightNASStrategy']
-
-_logger = get_logger(
-    __name__,
-    logging.INFO,
-    fmt='LightNASStrategy-%(asctime)s-%(levelname)s: %(message)s')
-
-
-class LightNASStrategy(Strategy):
-    """
-    Light-NAS search strategy.
-    """
-
-    def __init__(self,
-                 controller=None,
-                 end_epoch=1000,
-                 target_flops=629145600,
-                 target_latency=0,
-                 retrain_epoch=1,
-                 metric_name='top1_acc',
-                 server_ip=None,
-                 server_port=0,
-                 is_server=False,
-                 max_client_num=100,
-                 search_steps=None,
-                 key="light-nas"):
-        """
-        Args:
-            controller(searcher.Controller): The searching controller. Default: None.
-            end_epoch(int): The 'on_epoch_end' function will be called in end_epoch. Default: 0
-            target_flops(int): The constraint of FLOPS.
-            target_latency(float): The constraint of latency.
-            retrain_epoch(int): The number of training epochs before evaluating structure generated by controller. Default: 1.
-            metric_name(str): The metric used to evaluate the model.
-                         It should be one of keys in out_nodes of graph wrapper. Default: 'top1_acc'
-            server_ip(str): The ip that controller server listens on. None means getting the ip automatically. Default: None.
-            server_port(int): The port that controller server listens on. 0 means getting usable port automatically. Default: 0.
-            is_server(bool): Whether current host is controller server. Default: False.
-            max_client_num(int): The maximum number of clients that connect to controller server concurrently. Default: 100.
-            search_steps(int): The total steps of searching. Default: None.
-            key(str): The key used to identify legal agent for controller server. Default: "light-nas"
-        """
-        self.start_epoch = 0
-        self.end_epoch = end_epoch
-        self._max_flops = target_flops
-        self._max_latency = target_latency
-        self._metric_name = metric_name
-        self._controller = controller
-        self._retrain_epoch = 0
-        self._server_ip = server_ip
-        self._server_port = server_port
-        self._is_server = is_server
-        self._retrain_epoch = retrain_epoch
-        self._search_steps = search_steps
-        self._max_client_num = max_client_num
-        self._max_try_times = 100
-        self._key = key
-
-        if self._server_ip is None:
-            self._server_ip = self._get_host_ip()
-
-    def _get_host_ip(self):
-        return socket.gethostbyname(socket.gethostname())
-
-    def on_compression_begin(self, context):
-        self._current_tokens = context.search_space.init_tokens()
-        self._controller.reset(context.search_space.range_table(),
-                               self._current_tokens, None)
-
-        # create controller server
-        if self._is_server:
-            open("./slim_LightNASStrategy_controller_server.socket",
-                 'a').close()
-            socket_file = open(
-                "./slim_LightNASStrategy_controller_server.socket", 'r+')
-            lock(socket_file)
-            tid = socket_file.readline()
-            if tid == '':
-                _logger.info("start controller server...")
-                self._server = ControllerServer(
-                    controller=self._controller,
-                    address=(self._server_ip, self._server_port),
-                    max_client_num=self._max_client_num,
-                    search_steps=self._search_steps,
-                    key=self._key)
-                tid = self._server.start()
-                self._server_port = self._server.port()
-                socket_file.write(tid)
-                _logger.info("started controller server...")
-            unlock(socket_file)
-            socket_file.close()
-        _logger.info("self._server_ip: {}; self._server_port: {}".format(
-            self._server_ip, self._server_port))
-        # create client
-        self._search_agent = SearchAgent(
-            self._server_ip, self._server_port, key=self._key)
-
-    def __getstate__(self):
-        """Socket can't be pickled."""
-        d = {}
-        for key in self.__dict__:
-            if key not in ["_search_agent", "_server"]:
-                d[key] = self.__dict__[key]
-        return d
-
-    def on_epoch_begin(self, context):
-        if context.epoch_id >= self.start_epoch and context.epoch_id <= self.end_epoch and (
-                self._retrain_epoch == 0 or
-            (context.epoch_id - self.start_epoch) % self._retrain_epoch == 0):
-            _logger.info("light nas strategy on_epoch_begin")
-            min_flops = -1
-            for _ in range(self._max_try_times):
-                startup_p, train_p, test_p, _, _, train_reader, test_reader = context.search_space.create_net(
-                    self._current_tokens)
-                context.eval_graph.program = test_p
-                flops = context.eval_graph.flops()
-                if min_flops == -1:
-                    min_flops = flops
-                    min_tokens = self._current_tokens[:]
-                else:
-                    if flops < min_flops:
-                        min_tokens = self._current_tokens[:]
-                if self._max_latency > 0:
-                    latency = context.search_space.get_model_latency(test_p)
-                    _logger.info("try [{}] with latency {} flops {}".format(
-                        self._current_tokens, latency, flops))
-                else:
-                    _logger.info("try [{}] with flops {}".format(
-                        self._current_tokens, flops))
-                if flops > self._max_flops or (self._max_latency > 0 and
-                                               latency > self._max_latency):
-                    self._current_tokens = self._controller.next_tokens(
-                        min_tokens)
-                else:
-                    break
-
-            context.train_reader = train_reader
-            context.eval_reader = test_reader
-
-            exe = Executor(context.place)
-            exe.run(startup_p)
-
-            context.optimize_graph.program = train_p
-            context.optimize_graph.compile()
-
-            context.skip_training = (self._retrain_epoch == 0)
-
-    def on_epoch_end(self, context):
-        if context.epoch_id >= self.start_epoch and context.epoch_id < self.end_epoch and (
-                self._retrain_epoch == 0 or
-            (context.epoch_id - self.start_epoch + 1
-             ) % self._retrain_epoch == 0):
-
-            self._current_reward = context.eval_results[self._metric_name][-1]
-            flops = context.eval_graph.flops()
-            if flops > self._max_flops:
-                self._current_reward = 0.0
-            if self._max_latency > 0:
-                test_p = context.search_space.create_net(self._current_tokens)[
-                    2]
-                latency = context.search_space.get_model_latency(test_p)
-                if latency > self._max_latency:
-                    self._current_reward = 0.0
-                _logger.info("reward: {}; latency: {}; flops: {}; tokens: {}".
-                             format(self._current_reward, latency, flops,
-                                    self._current_tokens))
-            else:
-                _logger.info("reward: {}; flops: {}; tokens: {}".format(
-                    self._current_reward, flops, self._current_tokens))
-            self._current_tokens = self._search_agent.update(
-                self._current_tokens, self._current_reward)
--- a/python/paddle/fluid/contrib/slim/nas/lock.py
+++ b/python/paddle/fluid/contrib/slim/nas/lock.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-__All__ = ['lock', 'unlock']
-if os.name == 'nt':
-
-    def lock(file):
-        raise NotImplementedError('Windows is not supported.')
-
-    def unlock(file):
-        raise NotImplementedError('Windows is not supported.')
-
-elif os.name == 'posix':
-    from fcntl import flock, LOCK_EX, LOCK_UN
-
-    def lock(file):
-        """Lock the file in local file system."""
-        flock(file.fileno(), LOCK_EX)
-
-    def unlock(file):
-        """Unlock the file in local file system."""
-        flock(file.fileno(), LOCK_UN)
-else:
-    raise RuntimeError("File Locker only support NT and Posix platforms!")
--- a/python/paddle/fluid/contrib/slim/nas/search_agent.py
+++ b/python/paddle/fluid/contrib/slim/nas/search_agent.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import socket
-from ....log_helper import get_logger
-
-__all__ = ['SearchAgent']
-
-_logger = get_logger(
-    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
-
-
-class SearchAgent(object):
-    """
-    Search agent.
-    """
-
-    def __init__(self, server_ip=None, server_port=None, key=None):
-        """
-        Args:
-            server_ip(str): The ip that controller server listens on. None means getting the ip automatically. Default: None.
-            server_port(int): The port that controller server listens on. 0 means getting usable port automatically. Default: 0.
-            key(str): The key used to identify legal agent for controller server. Default: "light-nas"
-        """
-        self.server_ip = server_ip
-        self.server_port = server_port
-        self.socket_client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-        self._key = key
-
-    def update(self, tokens, reward):
-        """
-        Update the controller according to latest tokens and reward.
-        Args:
-            tokens(list<int>): The tokens generated in last step.
-            reward(float): The reward of tokens.
-        """
-        socket_client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-        socket_client.connect((self.server_ip, self.server_port))
-        tokens = ",".join([str(token) for token in tokens])
-        socket_client.send("{}\t{}\t{}".format(self._key, tokens, reward)
-                           .encode())
-        tokens = socket_client.recv(1024).decode()
-        tokens = [int(token) for token in tokens.strip("\n").split(",")]
-        return tokens
-
-    def next_tokens(self):
-        """
-        Get next tokens.
-        """
-        socket_client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-        socket_client.connect((self.server_ip, self.server_port))
-        socket_client.send("next_tokens".encode())
-        tokens = socket_client.recv(1024).decode()
-        tokens = [int(token) for token in tokens.strip("\n").split(",")]
-        return tokens
--- a/python/paddle/fluid/contrib/slim/nas/search_space.py
+++ b/python/paddle/fluid/contrib/slim/nas/search_space.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""The search space used to search neural architecture"""
-
-__all__ = ['SearchSpace']
-
-
-class SearchSpace(object):
-    """Controller for Neural Architecture Search.
-    """
-
-    def __init__(self, *args, **kwargs):
-        pass
-
-    def init_tokens(self):
-        """Get init tokens in search space.
-        """
-        raise NotImplementedError('Abstract method.')
-
-    def range_table(self):
-        """Get range table of current search space.
-        """
-        raise NotImplementedError('Abstract method.')
-
-    def create_net(self, tokens):
-        """Create networks for training and evaluation according to tokens.
-        Args:
-            tokens(list<int>): The tokens which represent a network.
-        Return:
-            (tuple): startup_program, train_program, evaluation_program, train_metrics, test_metrics
-        """
-        raise NotImplementedError('Abstract method.')
-
-    def get_model_latency(self, program):
-        """Get model latency according to program.
-        Args:
-            program(Program): The program to get latency.
-        Return:
-            (float): model latency.
-        """
-        raise NotImplementedError('Abstract method.')
--- a/python/paddle/fluid/contrib/slim/prune/__init__.py
+++ b/python/paddle/fluid/contrib/slim/prune/__init__.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from . import pruner
-from .pruner import *
-from . import prune_strategy
-from .prune_strategy import *
-from . import auto_prune_strategy
-from .auto_prune_strategy import *
-
-__all__ = pruner.__all__
-__all__ += prune_strategy.__all__
-__all__ += auto_prune_strategy.__all__
--- a/python/paddle/fluid/contrib/slim/prune/auto_prune_strategy.py
+++ b/python/paddle/fluid/contrib/slim/prune/auto_prune_strategy.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from .prune_strategy import PruneStrategy
-import re
-import logging
-import functools
-import copy
-from ....log_helper import get_logger
-
-__all__ = ['AutoPruneStrategy']
-
-_logger = get_logger(
-    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
-
-
-class AutoPruneStrategy(PruneStrategy):
-    """
-    Automatic pruning strategy.
-    """
-
-    def __init__(self,
-                 pruner=None,
-                 controller=None,
-                 start_epoch=0,
-                 end_epoch=10,
-                 min_ratio=0.5,
-                 max_ratio=0.7,
-                 metric_name='top1_acc',
-                 pruned_params='conv.*_weights',
-                 retrain_epoch=0,
-                 uniform_range=None,
-                 init_tokens=None):
-        """
-        Args:
-            pruner(slim.Pruner): The pruner used to prune the parameters. Default: None.
-            controller(searcher.Controller): The searching controller. Default: None.
-            start_epoch(int): The 'on_epoch_begin' function will be called in start_epoch. Default: 0
-            end_epoch(int): The 'on_epoch_end' function will be called in end_epoch. Default: 0
-            min_ratio(float): The maximum pruned ratio. Default: 0.7
-            max_ratio(float): The minimum pruned ratio. Default: 0.5
-            metric_name(str): The metric used to evaluate the model.
-                         It should be one of keys in out_nodes of graph wrapper. Default: 'top1_acc'
-            pruned_params(str): The pattern str to match the parameter names to be pruned. Default: 'conv.*_weights'
-            retrain_epoch(int): The training epochs in each searching step. Default: 0
-            uniform_range(int): The token range in each position of tokens generated by controller. None means getting the range automatically. Default: None.
-            init_tokens(list<int>): The initial tokens. None means getting the initial tokens automatically. Default: None.
-        """
-        super(AutoPruneStrategy, self).__init__(pruner, start_epoch, end_epoch,
-                                                0.0, metric_name, pruned_params)
-        self._max_ratio = max_ratio
-        self._min_ratio = min_ratio
-        self._controller = controller
-        self._metric_name = metric_name
-        self._pruned_param_names = []
-        self._retrain_epoch = retrain_epoch
-        self._uniform_range = uniform_range
-        self._init_tokens = init_tokens
-        self._current_tokens = None
-
-    def on_compression_begin(self, context):
-        """
-        Prepare some information for searching strategy.
-        step 1: Find all the parameters to be pruned.
-        step 2: Get initial tokens and setup controller.
-        """
-        pruned_params = []
-        for param in context.eval_graph.all_parameters():
-            if re.match(self.pruned_params, param.name()):
-                self._pruned_param_names.append(param.name())
-
-        if self._init_tokens is not None:
-            self._current_tokens = self._init_tokens
-        else:
-            self._current_tokens = self._get_init_tokens(context)
-
-        if self._uniform_range is not None:
-            self._range_table = [round(self._uniform_range, 2) / 0.01] * len(
-                self._pruned_param_names)
-        else:
-            self._range_table = copy.deepcopy(self._current_tokens)
-        _logger.info('init tokens: {}'.format(self._current_tokens))
-        _logger.info("range_table: {}".format(self._range_table))
-        constrain_func = functools.partial(
-            self._constrain_func, context=context)
-
-        self._controller.reset(self._range_table, self._current_tokens,
-                               constrain_func)
-
-    def _constrain_func(self, tokens, context=None):
-        """Check whether the tokens meet constraint."""
-        ori_flops = context.eval_graph.flops()
-        ratios = self._tokens_to_ratios(tokens)
-        params = self._pruned_param_names
-        param_shape_backup = {}
-        self._prune_parameters(
-            context.eval_graph,
-            context.scope,
-            params,
-            ratios,
-            context.place,
-            only_graph=True,
-            param_shape_backup=param_shape_backup)
-        context.eval_graph.update_groups_of_conv()
-        flops = context.eval_graph.flops()
-        for param in param_shape_backup.keys():
-            context.eval_graph.var(param).set_shape(param_shape_backup[param])
-        flops_ratio = (1 - float(flops) / ori_flops)
-        if flops_ratio >= self._min_ratio and flops_ratio <= self._max_ratio:
-            _logger.info("Success try [{}]; flops: -{}".format(tokens,
-                                                               flops_ratio))
-            return True
-        else:
-            _logger.info("Failed try [{}]; flops: -{}".format(tokens,
-                                                              flops_ratio))
-            return False
-
-    def _get_init_tokens(self, context):
-        """Get initial tokens.
-        """
-        ratios = self._get_uniform_ratios(context)
-        _logger.info('Get init ratios: {}'.format(
-            [round(r, 2) for r in ratios]))
-        return self._ratios_to_tokens(ratios)
-
-    def _ratios_to_tokens(self, ratios):
-        """Convert pruned ratios to tokens.
-        """
-        return [int(ratio / 0.01) for ratio in ratios]
-
-    def _tokens_to_ratios(self, tokens):
-        """Convert tokens to pruned ratios.
-        """
-        return [token * 0.01 for token in tokens]
-
-    def _get_uniform_ratios(self, context):
-        """
-        Search a group of uniform ratios.
-        """
-        min_ratio = 0.
-        max_ratio = 1.
-        target = (self._min_ratio + self._max_ratio) / 2
-        flops = context.eval_graph.flops()
-        model_size = context.eval_graph.numel_params()
-        ratios = None
-        while min_ratio < max_ratio:
-            ratio = (max_ratio + min_ratio) / 2
-            ratios = [ratio] * len(self._pruned_param_names)
-            param_shape_backup = {}
-            self._prune_parameters(
-                context.eval_graph,
-                context.scope,
-                self._pruned_param_names,
-                ratios,
-                context.place,
-                only_graph=True,
-                param_shape_backup=param_shape_backup)
-
-            pruned_flops = 1 - (float(context.eval_graph.flops()) / flops)
-            pruned_size = 1 - (float(context.eval_graph.numel_params()) /
-                               model_size)
-            for param in param_shape_backup.keys():
-                context.eval_graph.var(param).set_shape(param_shape_backup[
-                    param])
-
-            if abs(pruned_flops - target) < 1e-2:
-                break
-            if pruned_flops > target:
-                max_ratio = ratio
-            else:
-                min_ratio = ratio
-        _logger.info('Get ratios: {}'.format([round(r, 2) for r in ratios]))
-        return ratios
-
-    def on_epoch_begin(self, context):
-        """
-        step 1: Get a new tokens from controller.
-        step 2: Pruning eval_graph and optimize_program by tokens
-        """
-        if context.epoch_id >= self.start_epoch and context.epoch_id <= self.end_epoch and (
-                self._retrain_epoch == 0 or
-            (context.epoch_id - self.start_epoch) % self._retrain_epoch == 0):
-            _logger.info("on_epoch_begin")
-            params = self._pruned_param_names
-            ratios = self._tokens_to_ratios(self._current_tokens)
-
-            self._param_shape_backup = {}
-            self._param_backup = {}
-            self._prune_parameters(
-                context.optimize_graph,
-                context.scope,
-                params,
-                ratios,
-                context.place,
-                param_backup=self._param_backup,
-                param_shape_backup=self._param_shape_backup)
-            self._prune_graph(context.eval_graph, context.optimize_graph)
-            context.optimize_graph.update_groups_of_conv()
-            context.eval_graph.update_groups_of_conv()
-            context.optimize_graph.compile(
-                mem_opt=False)  # to update the compiled program
-            context.skip_training = (self._retrain_epoch == 0)
-
-    def on_epoch_end(self, context):
-        """
-        step 1: Get reward of current tokens and update controller.
-        step 2: Restore eval_graph and optimize_graph
-        """
-        if context.epoch_id >= self.start_epoch and context.epoch_id < self.end_epoch and (
-                self._retrain_epoch == 0 or
-            (context.epoch_id - self.start_epoch + 1
-             ) % self._retrain_epoch == 0):
-            _logger.info("on_epoch_end")
-            reward = context.eval_results[self._metric_name][-1]
-            self._controller.update(self._current_tokens, reward)
-
-            self._current_tokens = self._controller.next_tokens()
-            # restore pruned parameters
-            for param_name in self._param_backup.keys():
-                param_t = context.scope.find_var(param_name).get_tensor()
-                param_t.set(self._param_backup[param_name], context.place)
-            self._param_backup = {}
-            # restore shape of parameters
-            for param in self._param_shape_backup.keys():
-                context.optimize_graph.var(param).set_shape(
-                    self._param_shape_backup[param])
-            self._param_shape_backup = {}
-            self._prune_graph(context.eval_graph, context.optimize_graph)
-
-            context.optimize_graph.update_groups_of_conv()
-            context.eval_graph.update_groups_of_conv()
-            context.optimize_graph.compile(
-                mem_opt=False)  # to update the compiled program
-
-        elif context.epoch_id == self.end_epoch:  # restore graph for final training
-            # restore pruned parameters
-            for param_name in self._param_backup.keys():
-                param_t = context.scope.find_var(param_name).get_tensor()
-                param_t.set(self.param_backup[param_name], context.place)
-            # restore shape of parameters
-            for param in self._param_shape_backup.keys():
-                context.eval_graph.var(param).set_shape(
-                    self._param_shape_backup[param])
-                context.optimize_graph.var(param).set_shape(
-                    self._param_shape_backup[param])
-
-            context.optimize_graph.update_groups_of_conv()
-            context.eval_graph.update_groups_of_conv()
-
-            params, ratios = self._get_prune_ratios(
-                self._controller._best_tokens)
-            self._prune_parameters(context.optimize_graph, context.scope,
-                                   params, ratios, context.place)
-
-            self._prune_graph(context.eval_graph, context.optimize_graph)
-            context.optimize_graph.update_groups_of_conv()
-            context.eval_graph.update_groups_of_conv()
-            context.optimize_graph.compile(
-                mem_opt=True)  # to update the compiled program
-
-            context.skip_training = False
--- a/python/paddle/fluid/contrib/slim/prune/prune_strategy.py
+++ b/python/paddle/fluid/contrib/slim/prune/prune_strategy.py
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from ..core.strategy import Strategy
-from ..graph import VarWrapper, OpWrapper, GraphWrapper
-from ....framework import Program, program_guard, Parameter
-from ....log_helper import get_logger
-from .... import layers
-import prettytable as pt
-import numpy as np
-from scipy.optimize import leastsq
-import copy
-import re
-import os
-import pickle
-import logging
-import sys
-
-__all__ = ['SensitivePruneStrategy', 'UniformPruneStrategy', 'PruneStrategy']
-
-_logger = get_logger(
-    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
-
-
-class PruneStrategy(Strategy):
-    """
-    The base class of all pruning strategies.
-    """
-
-    def __init__(self,
-                 pruner=None,
-                 start_epoch=0,
-                 end_epoch=0,
-                 target_ratio=0.5,
-                 metric_name=None,
-                 pruned_params='conv.*_weights'):
-        """
-        Args:
-            pruner(slim.Pruner): The pruner used to prune the parameters.
-            start_epoch(int): The 'on_epoch_begin' function will be called in start_epoch. default: 0
-            end_epoch(int): The 'on_epoch_end' function will be called in end_epoch. default: 0
-            target_ratio(float): The flops ratio to be pruned from current model.
-            metric_name(str): The metric used to evaluate the model.
-                         It should be one of keys in out_nodes of graph wrapper.
-            pruned_params(str): The pattern str to match the parameter names to be pruned.
-        """
-        super(PruneStrategy, self).__init__(start_epoch, end_epoch)
-        self.pruner = pruner
-        self.target_ratio = target_ratio
-        self.metric_name = metric_name
-        self.pruned_params = pruned_params
-        self.pruned_list = []
-
-    def _eval_graph(self, context, sampled_rate=None, cached_id=0):
-        """
-        Evaluate the current mode in context.
-        Args:
-            context(slim.core.Context): The context storing all information used to evaluate the current model.
-            sampled_rate(float): The sampled rate used to sample partial data for evaluation. None means using all data in eval_reader. default: None.
-            cached_id(int): The id of dataset sampled. Evaluations with same cached_id use the same sampled dataset. default: 0.
-        """
-        results, names = context.run_eval_graph(sampled_rate, cached_id)
-        metric = np.mean(results[list(names).index(self.metric_name)])
-        return metric
-
-    def _prune_filters_by_ratio(self,
-                                scope,
-                                params,
-                                ratio,
-                                place,
-                                lazy=False,
-                                only_graph=False,
-                                param_shape_backup=None,
-                                param_backup=None):
-        """
-        Pruning filters by given ratio.
-        Args:
-            scope(fluid.core.Scope): The scope used to pruning filters.
-            params(list<VarWrapper>): A list of filter parameters.
-            ratio(float): The ratio to be pruned.
-            place(fluid.Place): The device place of filter parameters.
-            lazy(bool): True means setting the pruned elements to zero.
-                        False means cutting down the pruned elements.
-            only_graph(bool): True means only modifying the graph.
-                              False means modifying graph and variables in  scope.
-        """
-        if params[0].name() in self.pruned_list[0]:
-            return
-        param_t = scope.find_var(params[0].name()).get_tensor()
-        pruned_idx = self.pruner.cal_pruned_idx(
-            params[0].name(), np.array(param_t), ratio, axis=0)
-        for param in params:
-            assert isinstance(param, VarWrapper)
-            param_t = scope.find_var(param.name()).get_tensor()
-            if param_backup is not None and (param.name() not in param_backup):
-                param_backup[param.name()] = copy.deepcopy(np.array(param_t))
-            pruned_param = self.pruner.prune_tensor(
-                np.array(param_t), pruned_idx, pruned_axis=0, lazy=lazy)
-            if not only_graph:
-                param_t.set(pruned_param, place)
-            ori_shape = param.shape()
-            if param_shape_backup is not None and (
-                    param.name() not in param_shape_backup):
-                param_shape_backup[param.name()] = copy.deepcopy(param.shape())
-            new_shape = list(param.shape())
-            new_shape[0] = pruned_param.shape[0]
-            param.set_shape(new_shape)
-            _logger.debug(
-                '|----------------------------------------+----+------------------------------+------------------------------|'
-            )
-            _logger.debug('|{:^40}|{:^4}|{:^30}|{:^30}|'.format(
-                str(param.name()),
-                str(ratio), str(ori_shape), str(param.shape())))
-            self.pruned_list[0].append(param.name())
-        return pruned_idx
-
-    def _prune_parameter_by_idx(self,
-                                scope,
-                                params,
-                                pruned_idx,
-                                pruned_axis,
-                                place,
-                                lazy=False,
-                                only_graph=False,
-                                param_shape_backup=None,
-                                param_backup=None):
-        """
-        Pruning parameters in given axis.
-        Args:
-            scope(fluid.core.Scope): The scope storing paramaters to be pruned.
-            params(VarWrapper): The parameter to be pruned.
-            pruned_idx(list): The index of elements to be pruned.
-            pruned_axis(int): The pruning axis.
-            place(fluid.Place): The device place of filter parameters.
-            lazy(bool): True means setting the pruned elements to zero.
-                        False means cutting down the pruned elements.
-            only_graph(bool): True means only modifying the graph.
-                              False means modifying graph and variables in  scope.
-        """
-        if params[0].name() in self.pruned_list[pruned_axis]:
-            return
-        for param in params:
-            assert isinstance(param, VarWrapper)
-            param_t = scope.find_var(param.name()).get_tensor()
-            if param_backup is not None and (param.name() not in param_backup):
-                param_backup[param.name()] = copy.deepcopy(np.array(param_t))
-            pruned_param = self.pruner.prune_tensor(
-                np.array(param_t), pruned_idx, pruned_axis, lazy=lazy)
-            if not only_graph:
-                param_t.set(pruned_param, place)
-            ori_shape = param.shape()
-
-            if param_shape_backup is not None and (
-                    param.name() not in param_shape_backup):
-                param_shape_backup[param.name()] = copy.deepcopy(param.shape())
-            new_shape = list(param.shape())
-            new_shape[pruned_axis] = pruned_param.shape[pruned_axis]
-            param.set_shape(new_shape)
-            _logger.debug(
-                '|----------------------------------------+----+------------------------------+------------------------------|'
-            )
-            _logger.debug('|{:^40}|{:^4}|{:^30}|{:^30}|'.format(
-                str(param.name()),
-                str(pruned_axis), str(ori_shape), str(param.shape())))
-            self.pruned_list[pruned_axis].append(param.name())
-
-    def _forward_search_related_op(self, graph, param):
-        """
-        Forward search operators that will be affected by pruning of param.
-        Args:
-            graph(GraphWrapper): The graph to be searched.
-            param(VarWrapper): The current pruned parameter.
-        Returns:
-            list<OpWrapper>: A list of operators.
-        """
-        assert isinstance(param, VarWrapper)
-        visited = {}
-        for op in graph.ops():
-            visited[op.idx()] = False
-        stack = []
-        for op in graph.ops():
-            if (not op.is_bwd_op()) and (param in op.all_inputs()):
-                stack.append(op)
-        visit_path = []
-        while len(stack) > 0:
-            top_op = stack[len(stack) - 1]
-            if visited[top_op.idx()] == False:
-                visit_path.append(top_op)
-                visited[top_op.idx()] = True
-            next_ops = None
-            if top_op.type() == "conv2d" and param not in top_op.all_inputs():
-                next_ops = None
-            elif top_op.type() == "mul":
-                next_ops = None
-            else:
-                next_ops = self._get_next_unvisited_op(graph, visited, top_op)
-            if next_ops == None:
-                stack.pop()
-            else:
-                stack += next_ops
-        return visit_path
-
-    def _get_next_unvisited_op(self, graph, visited, top_op):
-        """
-        Get next unvisited adjacent operators of given operators.
-        Args:
-            graph(GraphWrapper): The graph used to search. 
-            visited(list): The ids of operators that has been visited.
-            top_op: The given operator.
-        Returns:
-            list<OpWrapper>: A list of operators. 
-        """
-        assert isinstance(top_op, OpWrapper)
-        next_ops = []
-        for op in graph.next_ops(top_op):
-            if (visited[op.idx()] == False) and (not op.is_bwd_op()):
-                next_ops.append(op)
-        return next_ops if len(next_ops) > 0 else None
-
-    def _get_accumulator(self, graph, param):
-        """
-        Get accumulators of given parameter. The accumulator was created by optimizer.
-        Args:
-            graph(GraphWrapper): The graph used to search.
-            param(VarWrapper): The given parameter.
-        Returns:
-            list<VarWrapper>: A list of accumulators which are variables.
-        """
-        assert isinstance(param, VarWrapper)
-        params = []
-        for op in param.outputs():
-            if op.is_opt_op():
-                for out_var in op.all_outputs():
-                    if graph.is_persistable(out_var) and out_var.name(
-                    ) != param.name():
-                        params.append(out_var)
-        return params
-
-    def _forward_pruning_ralated_params(self,
-                                        graph,
-                                        scope,
-                                        param,
-                                        place,
-                                        ratio=None,
-                                        pruned_idxs=None,
-                                        lazy=False,
-                                        only_graph=False,
-                                        param_backup=None,
-                                        param_shape_backup=None):
-        """
-        Pruning all the parameters affected by the pruning of given parameter.
-        Args:
-            graph(GraphWrapper): The graph to be searched.
-            scope(fluid.core.Scope): The scope storing paramaters to be pruned.
-            param(VarWrapper): The given parameter.
-            place(fluid.Place): The device place of filter parameters.
-            ratio(float): The target ratio to be pruned.
-            pruned_idx(list): The index of elements to be pruned.
-            lazy(bool): True means setting the pruned elements to zero.
-                        False means cutting down the pruned elements.
-            only_graph(bool): True means only modifying the graph.
-                              False means modifying graph and variables in  scope.
-        """
-        assert isinstance(
-            graph,
-            GraphWrapper), "graph must be instance of slim.core.GraphWrapper"
-        assert isinstance(
-            param, VarWrapper), "param must be instance of slim.core.VarWrapper"
-
-        if param.name() in self.pruned_list[0]:
-            return
-        related_ops = self._forward_search_related_op(graph, param)
-
-        if ratio is None:
-            assert pruned_idxs is not None
-            self._prune_parameter_by_idx(
-                scope, [param] + self._get_accumulator(graph, param),
-                pruned_idxs,
-                pruned_axis=0,
-                place=place,
-                lazy=lazy,
-                only_graph=only_graph,
-                param_backup=param_backup,
-                param_shape_backup=param_shape_backup)
-
-        else:
-            pruned_idxs = self._prune_filters_by_ratio(
-                scope, [param] + self._get_accumulator(graph, param),
-                ratio,
-                place,
-                lazy=lazy,
-                only_graph=only_graph,
-                param_backup=param_backup,
-                param_shape_backup=param_shape_backup)
-        corrected_idxs = pruned_idxs[:]
-
-        for idx, op in enumerate(related_ops):
-            if op.type() == "conv2d" and (param not in op.all_inputs()):
-                for in_var in op.all_inputs():
-                    if graph.is_parameter(in_var):
-                        conv_param = in_var
-                        self._prune_parameter_by_idx(
-                            scope, [conv_param] + self._get_accumulator(
-                                graph, conv_param),
-                            corrected_idxs,
-                            pruned_axis=1,
-                            place=place,
-                            lazy=lazy,
-                            only_graph=only_graph,
-                            param_backup=param_backup,
-                            param_shape_backup=param_shape_backup)
-            if op.type() == "depthwise_conv2d":
-                for in_var in op.all_inputs():
-                    if graph.is_parameter(in_var):
-                        conv_param = in_var
-                        self._prune_parameter_by_idx(
-                            scope, [conv_param] + self._get_accumulator(
-                                graph, conv_param),
-                            corrected_idxs,
-                            pruned_axis=0,
-                            place=place,
-                            lazy=lazy,
-                            only_graph=only_graph,
-                            param_backup=param_backup,
-                            param_shape_backup=param_shape_backup)
-            elif op.type() == "elementwise_add":
-                # pruning bias
-                for in_var in op.all_inputs():
-                    if graph.is_parameter(in_var):
-                        bias_param = in_var
-                        self._prune_parameter_by_idx(
-                            scope, [bias_param] + self._get_accumulator(
-                                graph, bias_param),
-                            pruned_idxs,
-                            pruned_axis=0,
-                            place=place,
-                            lazy=lazy,
-                            only_graph=only_graph,
-                            param_backup=param_backup,
-                            param_shape_backup=param_shape_backup)
-            elif op.type() == "mul":  # pruning fc layer
-                fc_input = None
-                fc_param = None
-                for in_var in op.all_inputs():
-                    if graph.is_parameter(in_var):
-                        fc_param = in_var
-                    else:
-                        fc_input = in_var
-
-                idx = []
-                feature_map_size = fc_input.shape()[2] * fc_input.shape()[3]
-                range_idx = np.array(range(feature_map_size))
-                for i in corrected_idxs:
-                    idx += list(range_idx + i * feature_map_size)
-                corrected_idxs = idx
-                self._prune_parameter_by_idx(
-                    scope, [fc_param] + self._get_accumulator(graph, fc_param),
-                    corrected_idxs,
-                    pruned_axis=0,
-                    place=place,
-                    lazy=lazy,
-                    only_graph=only_graph,
-                    param_backup=param_backup,
-                    param_shape_backup=param_shape_backup)
-
-            elif op.type() == "concat":
-                concat_inputs = op.all_inputs()
-                last_op = related_ops[idx - 1]
-                for out_var in last_op.all_outputs():
-                    if out_var in concat_inputs:
-                        concat_idx = concat_inputs.index(out_var)
-                offset = 0
-                for ci in range(concat_idx):
-                    offset += concat_inputs[ci].shape()[1]
-                corrected_idxs = [x + offset for x in pruned_idxs]
-            elif op.type() == "batch_norm":
-                bn_inputs = op.all_inputs()
-                mean = bn_inputs[2]
-                variance = bn_inputs[3]
-                alpha = bn_inputs[0]
-                beta = bn_inputs[1]
-                self._prune_parameter_by_idx(
-                    scope, [mean] + self._get_accumulator(graph, mean),
-                    corrected_idxs,
-                    pruned_axis=0,
-                    place=place,
-                    lazy=lazy,
-                    only_graph=only_graph,
-                    param_backup=param_backup,
-                    param_shape_backup=param_shape_backup)
-                self._prune_parameter_by_idx(
-                    scope, [variance] + self._get_accumulator(graph, variance),
-                    corrected_idxs,
-                    pruned_axis=0,
-                    place=place,
-                    lazy=lazy,
-                    only_graph=only_graph,
-                    param_backup=param_backup,
-                    param_shape_backup=param_shape_backup)
-                self._prune_parameter_by_idx(
-                    scope, [alpha] + self._get_accumulator(graph, alpha),
-                    corrected_idxs,
-                    pruned_axis=0,
-                    place=place,
-                    lazy=lazy,
-                    only_graph=only_graph,
-                    param_backup=param_backup,
-                    param_shape_backup=param_shape_backup)
-                self._prune_parameter_by_idx(
-                    scope, [beta] + self._get_accumulator(graph, beta),
-                    corrected_idxs,
-                    pruned_axis=0,
-                    place=place,
-                    lazy=lazy,
-                    only_graph=only_graph,
-                    param_backup=param_backup,
-                    param_shape_backup=param_shape_backup)
-
-    def _prune_parameters(self,
-                          graph,
-                          scope,
-                          params,
-                          ratios,
-                          place,
-                          lazy=False,
-                          only_graph=False,
-                          param_backup=None,
-                          param_shape_backup=None):
-        """
-        Pruning the given parameters.
-        Args:
-            graph(GraphWrapper): The graph to be searched.
-            scope(fluid.core.Scope): The scope storing paramaters to be pruned.
-            params(list<str>): A list of parameter names to be pruned.
-            ratios(list<float>): A list of ratios to be used to pruning parameters.
-            place(fluid.Place): The device place of filter parameters.
-            pruned_idx(list): The index of elements to be pruned.
-            lazy(bool): True means setting the pruned elements to zero.
-                        False means cutting down the pruned elements.
-            only_graph(bool): True means only modifying the graph.
-                              False means modifying graph and variables in  scope.
-
-        """
-        _logger.debug('\n################################')
-        _logger.debug('#       pruning parameters       #')
-        _logger.debug('################################\n')
-        _logger.debug(
-            '|----------------------------------------+----+------------------------------+------------------------------|'
-        )
-        _logger.debug('|{:^40}|{:^4}|{:^30}|{:^30}|'.format('parameter', 'axis',
-                                                            'from', 'to'))
-        assert len(params) == len(ratios)
-        self.pruned_list = [[], []]
-        for param, ratio in zip(params, ratios):
-            assert isinstance(param, str) or isinstance(param, unicode)
-            param = graph.var(param)
-            self._forward_pruning_ralated_params(
-                graph,
-                scope,
-                param,
-                place,
-                ratio=ratio,
-                lazy=lazy,
-                only_graph=only_graph,
-                param_backup=param_backup,
-                param_shape_backup=param_shape_backup)
-            ops = param.outputs()
-            for op in ops:
-                if op.type() == 'conv2d':
-                    brother_ops = self._search_brother_ops(graph, op)
-                    for broher in brother_ops:
-                        for p in graph.get_param_by_op(broher):
-                            self._forward_pruning_ralated_params(
-                                graph,
-                                scope,
-                                p,
-                                place,
-                                ratio=ratio,
-                                lazy=lazy,
-                                only_graph=only_graph,
-                                param_backup=param_backup,
-                                param_shape_backup=param_shape_backup)
-        _logger.debug(
-            '|----------------------------------------+----+------------------------------+------------------------------|'
-        )
-
-    def _search_brother_ops(self, graph, op_node):
-        """
-        Search brother operators that was affected by pruning of given operator.
-        Args:
-            graph(GraphWrapper): The graph to be searched.
-            op_node(OpWrapper): The start node for searching.
-        Returns: 
-            list<VarWrapper>: A list of operators.
-        """
-        visited = [op_node.idx()]
-        stack = []
-        brothers = []
-        for op in graph.next_ops(op_node):
-            if (op.type() != 'conv2d') and (op.type() != 'fc') and (
-                    not op._is_bwd_op()):
-                stack.append(op)
-                visited.append(op.idx())
-        while len(stack) > 0:
-            top_op = stack.pop()
-            for parent in graph.pre_ops(top_op):
-                if parent.idx() not in visited and (not parent._is_bwd_op()):
-                    if ((parent.type == 'conv2d') or (parent.type == 'fc')):
-                        brothers.append(parent)
-                    else:
-                        stack.append(parent)
-                    visited.append(parent.idx())
-
-            for child in graph.next_ops(top_op):
-                if (child.type != 'conv2d') and (child.type != 'fc') and (
-                        child.idx() not in visited) and (
-                            not child._is_bwd_op()):
-                    stack.append(child)
-                    visited.append(child.idx())
-        return brothers
-
-    def _prune_graph(self, graph, target_graph):
-        """
-        Pruning parameters of graph according to target graph.
-        Args:
-            graph(GraphWrapper): The graph to be pruned.
-            target_graph(GraphWrapper): The reference graph.
-        Return: None
-        """
-        count = 1
-        _logger.debug(
-            '|----+----------------------------------------+------------------------------+------------------------------|'
-        )
-        _logger.debug('|{:^4}|{:^40}|{:^30}|{:^30}|'.format('id', 'parammeter',
-                                                            'from', 'to'))
-        for param in target_graph.all_parameters():
-            var = graph.var(param.name())
-            ori_shape = var.shape()
-            var.set_shape(param.shape())
-            _logger.debug(
-                '|----+----------------------------------------+------------------------------+------------------------------|'
-            )
-            _logger.debug('|{:^4}|{:^40}|{:^30}|{:^30}|'.format(
-                str(count),
-                str(param.name()), str(ori_shape), str(param.shape())))
-            count += 1
-        _logger.debug(
-            '|----+----------------------------------------+------------------------------+------------------------------|'
-        )
-
-
-class UniformPruneStrategy(PruneStrategy):
-    """
-    The uniform pruning strategy. The parameters will be pruned by uniform ratio.
-    """
-
-    def __init__(self,
-                 pruner=None,
-                 start_epoch=0,
-                 end_epoch=0,
-                 target_ratio=0.5,
-                 metric_name=None,
-                 pruned_params='conv.*_weights'):
-        """
-        Args:
-            pruner(slim.Pruner): The pruner used to prune the parameters.
-            start_epoch(int): The 'on_epoch_begin' function will be called in start_epoch. default: 0
-            end_epoch(int): The 'on_epoch_end' function will be called in end_epoch. default: 0
-            target_ratio(float): The flops ratio to be pruned from current model.
-            metric_name(str): The metric used to evaluate the model.
-                         It should be one of keys in out_nodes of graph wrapper.
-            pruned_params(str): The pattern str to match the parameter names to be pruned.
-        """
-        super(UniformPruneStrategy, self).__init__(pruner, start_epoch,
-                                                   end_epoch, target_ratio,
-                                                   metric_name, pruned_params)
-
-    def _get_best_ratios(self, context):
-        """
-        Search a group of ratios for pruning target flops.
-        """
-        _logger.info('_get_best_ratios')
-        pruned_params = []
-        for param in context.eval_graph.all_parameters():
-            if re.match(self.pruned_params, param.name()):
-                pruned_params.append(param.name())
-
-        min_ratio = 0.
-        max_ratio = 1.
-
-        flops = context.eval_graph.flops()
-        model_size = context.eval_graph.numel_params()
-
-        while min_ratio < max_ratio:
-            ratio = (max_ratio + min_ratio) / 2
-            _logger.debug(
-                '-----------Try pruning ratio: {:.2f}-----------'.format(ratio))
-            ratios = [ratio] * len(pruned_params)
-            param_shape_backup = {}
-            self._prune_parameters(
-                context.eval_graph,
-                context.scope,
-                pruned_params,
-                ratios,
-                context.place,
-                only_graph=True,
-                param_shape_backup=param_shape_backup)
-
-            pruned_flops = 1 - (float(context.eval_graph.flops()) / flops)
-            pruned_size = 1 - (float(context.eval_graph.numel_params()) /
-                               model_size)
-            _logger.debug('Pruned flops: {:.2f}'.format(pruned_flops))
-            _logger.debug('Pruned model size: {:.2f}'.format(pruned_size))
-            for param in param_shape_backup.keys():
-                context.eval_graph.var(param).set_shape(param_shape_backup[
-                    param])
-
-            if abs(pruned_flops - self.target_ratio) < 1e-2:
-                break
-            if pruned_flops > self.target_ratio:
-                max_ratio = ratio
-            else:
-                min_ratio = ratio
-        _logger.info('Get ratios: {}'.format([round(r, 2) for r in ratios]))
-        return pruned_params, ratios
-
-    def restore_from_checkpoint(self, context):
-        self._prune(context, self.params, self.ratios)
-
-    def _prune(self, context, params, ratios):
-        self._prune_parameters(context.optimize_graph, context.scope, params,
-                               ratios, context.place)
-
-        model_size = context.eval_graph.numel_params()
-        flops = context.eval_graph.flops()
-        _logger.debug('\n################################')
-        _logger.debug('#          pruning eval graph    #')
-        _logger.debug('################################\n')
-        self._prune_graph(context.eval_graph, context.optimize_graph)
-        context.optimize_graph.update_groups_of_conv()
-        context.eval_graph.update_groups_of_conv()
-
-        _logger.info(
-            '------------------finish pruning--------------------------------')
-        _logger.info('Pruned size: {:.2f}'.format(1 - (float(
-            context.eval_graph.numel_params()) / model_size)))
-        _logger.info('Pruned flops: {:.2f}'.format(1 - (float(
-            context.eval_graph.flops()) / flops)))
-
-    def on_epoch_begin(self, context):
-        if context.epoch_id == self.start_epoch:
-            params, ratios = self._get_best_ratios(context)
-            self.params = params
-            self.ratios = ratios
-            self._prune(context, params, ratios)
-            _logger.info(
-                '------------------UniformPruneStrategy.on_compression_begin finish--------------------------------'
-            )
-
-
-class SensitivePruneStrategy(PruneStrategy):
-    """
-    Sensitive pruning strategy. Different pruned ratio was applied on each layer.
-    """
-
-    def __init__(self,
-                 pruner=None,
-                 start_epoch=0,
-                 end_epoch=0,
-                 delta_rate=0.20,
-                 target_ratio=0.5,
-                 metric_name='top1_acc',
-                 pruned_params='conv.*_weights',
-                 sensitivities_file='./sensitivities.data',
-                 sensitivities={},
-                 num_steps=1,
-                 eval_rate=None):
-        """
-        Args:
-            pruner(slim.Pruner): The pruner used to prune the parameters.
-            start_epoch(int): The 'on_epoch_begin' function will be called in start_epoch. default: 0.
-            end_epoch(int): The 'on_epoch_end' function will be called in end_epoch. default: 10.
-            delta_rate(float): The delta used to generate ratios when calculating sensitivities. default: 0.2
-            target_ratio(float): The flops ratio to be pruned from current model. default: 0.5
-            metric_name(str): The metric used to evaluate the model.
-                         It should be one of keys in out_nodes of graph wrapper. default: 'top1_acc'
-            pruned_params(str): The pattern str to match the parameter names to be pruned. default: 'conv.*_weights'.
-            sensitivities_file(str): The sensitivities file. default: './sensitivities.data'
-            sensitivities(dict): The user-defined sensitivities. default: {}.
-            num_steps(int): The number of pruning steps. default: 1.
-            eval_rate(float): The rate of sampled data used to calculate sensitivities.
-                              None means using all the data. default: None.
-        """
-        super(SensitivePruneStrategy, self).__init__(pruner, start_epoch,
-                                                     end_epoch, target_ratio,
-                                                     metric_name, pruned_params)
-        self.delta_rate = delta_rate
-        self.pruned_list = []
-        self.sensitivities = sensitivities
-        self.sensitivities_file = sensitivities_file
-        self.num_steps = num_steps
-        self.eval_rate = eval_rate
-        self.pruning_step = 1 - pow((1 - target_ratio), 1.0 / self.num_steps)
-
-    def _save_sensitivities(self, sensitivities, sensitivities_file):
-        """
-        Save sensitivities into file.
-        """
-        with open(sensitivities_file, 'wb') as f:
-            pickle.dump(sensitivities, f)
-
-    def _load_sensitivities(self, sensitivities_file):
-        """
-        Load sensitivities from file.
-        """
-        sensitivities = {}
-        if sensitivities_file and os.path.exists(sensitivities_file):
-            with open(sensitivities_file, 'rb') as f:
-                if sys.version_info < (3, 0):
-                    sensitivities = pickle.load(f)
-                else:
-                    sensitivities = pickle.load(f, encoding='bytes')
-
-        for param in sensitivities:
-            sensitivities[param]['pruned_percent'] = [
-                round(p, 2) for p in sensitivities[param]['pruned_percent']
-            ]
-        self._format_sensitivities(sensitivities)
-        return sensitivities
-
-    def _format_sensitivities(self, sensitivities):
-        """
-        Print formatted sensitivities in debug log level.
-        """
-        tb = pt.PrettyTable()
-        tb.field_names = ["parameter", "size"] + [
-            str(round(i, 2))
-            for i in np.arange(self.delta_rate, 1, self.delta_rate)
-        ]
-        for param in sensitivities:
-            if len(sensitivities[param]['loss']) == (len(tb.field_names) - 2):
-                tb.add_row([param, sensitivities[param]['size']] + [
-                    round(loss, 2) for loss in sensitivities[param]['loss']
-                ])
-        _logger.debug('\n################################')
-        _logger.debug('#      sensitivities table     #')
-        _logger.debug('################################\n')
-        _logger.debug(tb)
-
-    def _compute_sensitivities(self, context):
-        """
-        Computing the sensitivities of all parameters.
-        """
-        _logger.info("calling _compute_sensitivities.")
-        cached_id = np.random.randint(1000)
-        if self.start_epoch == context.epoch_id:
-            sensitivities_file = self.sensitivities_file
-        else:
-            sensitivities_file = self.sensitivities_file + ".epoch" + str(
-                context.epoch_id)
-        sensitivities = self._load_sensitivities(sensitivities_file)
-
-        for param in context.eval_graph.all_parameters():
-            if not re.match(self.pruned_params, param.name()):
-                continue
-            if param.name() not in sensitivities:
-                sensitivities[param.name()] = {
-                    'pruned_percent': [],
-                    'loss': [],
-                    'size': param.shape()[0]
-                }
-
-        metric = None
-        for param in sensitivities.keys():
-            ratio = self.delta_rate
-            while ratio < 1:
-                ratio = round(ratio, 2)
-                if ratio in sensitivities[param]['pruned_percent']:
-                    _logger.debug('{}, {} has computed.'.format(param, ratio))
-                    ratio += self.delta_rate
-                    continue
-                if metric is None:
-                    metric = self._eval_graph(context, self.eval_rate,
-                                              cached_id)
-
-                param_backup = {}
-                # prune parameter by ratio
-                self._prune_parameters(
-                    context.eval_graph,
-                    context.scope, [param], [ratio],
-                    context.place,
-                    lazy=True,
-                    param_backup=param_backup)
-                self.pruned_list[0]
-                # get accuracy after pruning and update self.sensitivities
-                pruned_metric = self._eval_graph(context, self.eval_rate,
-                                                 cached_id)
-                loss = metric - pruned_metric
-                _logger.info("pruned param: {}; {}; loss={}".format(
-                    param, ratio, loss))
-                for brother in self.pruned_list[0]:
-                    if re.match(self.pruned_params, brother):
-                        if brother not in sensitivities:
-                            sensitivities[brother] = {
-                                'pruned_percent': [],
-                                'loss': []
-                            }
-                        sensitivities[brother]['pruned_percent'].append(ratio)
-                        sensitivities[brother]['loss'].append(loss)
-
-                self._save_sensitivities(sensitivities, sensitivities_file)
-
-                # restore pruned parameters
-                for param_name in param_backup.keys():
-                    param_t = context.scope.find_var(param_name).get_tensor()
-                    param_t.set(param_backup[param_name], context.place)
-
-#                pruned_metric = self._eval_graph(context)
-
-                ratio += self.delta_rate
-        return sensitivities
-
-    def _get_best_ratios(self, context, sensitivities, target_ratio):
-        """
-        Search a group of ratios for pruning target flops.
-        """
-        _logger.info('_get_best_ratios for pruning ratie: {}'.format(
-            target_ratio))
-
-        def func(params, x):
-            a, b, c, d = params
-            return a * x * x * x + b * x * x + c * x + d
-
-        def error(params, x, y):
-            return func(params, x) - y
-
-        def slove_coefficient(x, y):
-            init_coefficient = [10, 10, 10, 10]
-            coefficient, loss = leastsq(error, init_coefficient, args=(x, y))
-            return coefficient
-
-        min_loss = 0.
-        max_loss = 0.
-
-        # step 1: fit curve by sensitivities
-        coefficients = {}
-        for param in sensitivities:
-            losses = np.array([0] * 5 + sensitivities[param]['loss'])
-            precents = np.array([0] * 5 + sensitivities[param][
-                'pruned_percent'])
-            coefficients[param] = slove_coefficient(precents, losses)
-            loss = np.max(losses)
-            max_loss = np.max([max_loss, loss])
-
-        # step 2: Find a group of ratios by binary searching.
-        flops = context.eval_graph.flops()
-        model_size = context.eval_graph.numel_params()
-        ratios = []
-        while min_loss < max_loss:
-            loss = (max_loss + min_loss) / 2
-            _logger.info(
-                '-----------Try pruned ratios while acc loss={:.4f}-----------'.
-                format(loss))
-            ratios = []
-            # step 2.1: Get ratios according to current loss
-            for param in sensitivities:
-                coefficient = copy.deepcopy(coefficients[param])
-                coefficient[-1] = coefficient[-1] - loss
-                roots = np.roots(coefficient)
-                for root in roots:
-                    min_root = 1
-                    if np.isreal(root) and root > 0 and root < 1:
-                        selected_root = min(root.real, min_root)
-                ratios.append(selected_root)
-            _logger.info('Pruned ratios={}'.format(
-                [round(ratio, 3) for ratio in ratios]))
-            # step 2.2: Pruning by current ratios
-            param_shape_backup = {}
-            self._prune_parameters(
-                context.eval_graph,
-                context.scope,
-                sensitivities.keys(),
-                ratios,
-                context.place,
-                only_graph=True,
-                param_shape_backup=param_shape_backup)
-
-            pruned_flops = 1 - (float(context.eval_graph.flops()) / flops)
-            pruned_size = 1 - (float(context.eval_graph.numel_params()) /
-                               model_size)
-            _logger.info('Pruned flops: {:.4f}'.format(pruned_flops))
-            _logger.info('Pruned model size: {:.4f}'.format(pruned_size))
-            for param in param_shape_backup.keys():
-                context.eval_graph.var(param).set_shape(param_shape_backup[
-                    param])
-
-            # step 2.3: Check whether current ratios is enough
-            if abs(pruned_flops - target_ratio) < 0.015:
-                break
-            if pruned_flops > target_ratio:
-                max_loss = loss
-            else:
-                min_loss = loss
-        return sensitivities.keys(), ratios
-
-    def _current_pruning_target(self, context):
-        '''
-        Get the target pruning rate in current epoch.
-        '''
-        _logger.info('Left number of pruning steps: {}'.format(self.num_steps))
-        if self.num_steps <= 0:
-            return None
-        if (self.start_epoch == context.epoch_id) or context.eval_converged(
-                self.metric_name, 0.005):
-            self.num_steps -= 1
-            return self.pruning_step
-
-    def on_epoch_begin(self, context):
-        current_ratio = self._current_pruning_target(context)
-        if current_ratio is not None:
-            sensitivities = self._compute_sensitivities(context)
-            params, ratios = self._get_best_ratios(context, sensitivities,
-                                                   current_ratio)
-            self._prune_parameters(context.optimize_graph, context.scope,
-                                   params, ratios, context.place)
-
-            model_size = context.eval_graph.numel_params()
-            flops = context.eval_graph.flops()
-            _logger.debug('################################')
-            _logger.debug('#          pruning eval graph    #')
-            _logger.debug('################################')
-            self._prune_graph(context.eval_graph, context.optimize_graph)
-            context.optimize_graph.update_groups_of_conv()
-            context.eval_graph.update_groups_of_conv()
-            context.optimize_graph.compile()  # to update the compiled program
-            context.eval_graph.compile(
-                for_parallel=False,
-                for_test=True)  # to update the compiled program
-            _logger.info(
-                '------------------finish pruning--------------------------------'
-            )
-            _logger.info('Pruned size: {:.3f}'.format(1 - (float(
-                context.eval_graph.numel_params()) / model_size)))
-            _logger.info('Pruned flops: {:.3f}'.format(1 - (float(
-                context.eval_graph.flops()) / flops)))
-            metric = self._eval_graph(context)
-            _logger.info('Metric after pruning: {:.2f}'.format(metric))
-            _logger.info(
-                '------------------SensitivePruneStrategy.on_epoch_begin finish--------------------------------'
-            )
--- a/python/paddle/fluid/contrib/slim/prune/pruner.py
+++ b/python/paddle/fluid/contrib/slim/prune/pruner.py
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import collections
-from .... import layers
-
-__all__ = ['Pruner', 'StructurePruner']
-
-
-class Pruner(object):
-    """
-    Base class of all pruners.
-    """
-
-    def __init__(self):
-        pass
-
-    def prune(self, param):
-        pass
-
-
-class StructurePruner(Pruner):
-    """
-    Pruner used to pruning parameters by groups.
-    """
-
-    def __init__(self, pruning_axis, criterions):
-        """
-        Args:
-            pruning_axis(dict): The key is the name of parameter to be pruned,
-                                '*' means all the parameters.
-                                The value is the axis to be used. Given a parameter
-                                with shape [3, 4], the result of pruning 50% on axis 1
-                                is a parameter with shape [3, 2].
-            criterions(dict): The key is the name of parameter to be pruned,
-                              '*' means all the parameters.
-                              The value is the criterion used to sort groups for pruning.
-                              It only supports 'l1_norm' currently.
-        """
-        self.pruning_axis = pruning_axis
-        self.criterions = criterions
-
-    def cal_pruned_idx(self, name, param, ratio, axis=None):
-        """
-        Calculate the index to be pruned on axis by given pruning ratio.
-        Args:
-            name(str): The name of parameter to be pruned.
-            param(np.array): The data of parameter to be pruned.
-            ratio(float): The ratio to be pruned.
-            axis(int): The axis to be used for pruning given parameter.
-                       If it is None, the value in self.pruning_axis will be used.
-                       default: None.
-        Returns:
-            list<int>: The indexes to be pruned on axis.
-        """
-        criterion = self.criterions[
-            name] if name in self.criterions else self.criterions['*']
-        if axis is None:
-            assert self.pruning_axis is not None, "pruning_axis should set if axis is None."
-            axis = self.pruning_axis[
-                name] if name in self.pruning_axis else self.pruning_axis['*']
-        prune_num = int(round(param.shape[axis] * ratio))
-        reduce_dims = [i for i in range(len(param.shape)) if i != axis]
-        if criterion == 'l1_norm':
-            criterions = np.sum(np.abs(param), axis=tuple(reduce_dims))
-        pruned_idx = criterions.argsort()[:prune_num]
-        return pruned_idx
-
-    def prune_tensor(self, tensor, pruned_idx, pruned_axis, lazy=False):
-        """
-        Pruning a array by indexes on given axis.
-        Args:
-            tensor(numpy.array): The target array to be pruned.
-            pruned_idx(list<int>): The indexes to be pruned.
-            pruned_axis(int): The axis of given array to be pruned on. 
-            lazy(bool): True means setting the pruned elements to zero.
-                        False means remove the pruned elements from memory.
-                        default: False.
-        Returns:
-            numpy.array: The pruned array.
-        """
-        mask = np.zeros(tensor.shape[pruned_axis], dtype=bool)
-        mask[pruned_idx] = True
-
-        def func(data):
-            return data[~mask]
-
-        def lazy_func(data):
-            data[mask] = 0
-            return data
-
-        if lazy:
-            return np.apply_along_axis(lazy_func, pruned_axis, tensor)
-        else:
-            return np.apply_along_axis(func, pruned_axis, tensor)
--- a/python/paddle/fluid/contrib/slim/quantization/__init__.py
+++ b/python/paddle/fluid/contrib/slim/quantization/__init__.py
@@ -16,10 +16,6 @@ from __future__ import print_function

 from . import quantization_pass
 from .quantization_pass import *
-from . import quantization_strategy
-from .quantization_strategy import *
-from . import mkldnn_post_training_strategy
-from .mkldnn_post_training_strategy import *
 from . import quant_int8_mkldnn_pass
 from .quant_int8_mkldnn_pass import *
 from . import quant2_int8_mkldnn_pass
@@ -29,8 +25,7 @@ from .post_training_quantization import *
 from . import imperative
 from .imperative import *

-__all__ = quantization_pass.__all__ + quantization_strategy.__all__
-__all__ += mkldnn_post_training_strategy.__all__
+__all__ = quantization_pass.__all__
 __all__ += quant_int8_mkldnn_pass.__all__
 __all__ += quant2_int8_mkldnn_pass.__all__
 __all__ += post_training_quantization.__all__

--- a/python/paddle/fluid/contrib/slim/quantization/mkldnn_post_training_strategy.py
+++ b/python/paddle/fluid/contrib/slim/quantization/mkldnn_post_training_strategy.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import logging
-import six
-import numpy as np
-from .... import core
-from ..core.strategy import Strategy
-from ....log_helper import get_logger
-
-__all__ = ['MKLDNNPostTrainingQuantStrategy']
-
-_logger = get_logger(
-    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
-
-
-class MKLDNNPostTrainingQuantStrategy(Strategy):
-    """
-    The strategy for MKL-DNN Post Training quantization strategy.
-    """
-
-    def __init__(self,
-                 int8_model_save_path=None,
-                 fp32_model_path=None,
-                 cpu_math_library_num_threads=1):
-        """
-        Args:
-            int8_model_save_path(str): int8_model_save_path is used to save an int8 ProgramDesc
-                        with fp32 weights which is used for MKL-DNN int8 inference. For post training quantization,
-                        MKLDNNPostTrainingQuantStrategy only supports converting a fp32 ProgramDesc
-                        with fp32 weights to an int8 ProgramDesc with fp32 weights now. The saved
-                        int8 ProgramDesc with fp32 weights only can be executed with MKL-DNN enabled.
-                        None means it doesn't save int8 ProgramDesc with fp32 weights. default: None.
-            fp32_model_path(str): fp32_model_path is used to load an original fp32 ProgramDesc with fp32 weights.
-                        None means it doesn't have a fp32 ProgramDesc with fp32 weights. default: None.
-            cpu_math_library_num_threads(int): The number of cpu math library threads which is used on
-                        MKLDNNPostTrainingQuantStrategy. 1 means it only uses one cpu math library
-                        thread. default: 1
-        """
-
-        super(MKLDNNPostTrainingQuantStrategy, self).__init__(0, 0)
-        self.int8_model_save_path = int8_model_save_path
-        if fp32_model_path is None:
-            raise Exception("fp32_model_path is None")
-        self.fp32_model_path = fp32_model_path
-        self.cpu_math_library_num_threads = cpu_math_library_num_threads
-
-    def on_compression_begin(self, context):
-        """
-        Prepare the data and quantify the model
-        """
-
-        super(MKLDNNPostTrainingQuantStrategy,
-              self).on_compression_begin(context)
-        _logger.info('InferQuantStrategy::on_compression_begin')
-
-        # Prepare the Analysis Config
-        infer_config = core.AnalysisConfig("AnalysisConfig")
-        infer_config.switch_ir_optim(True)
-        infer_config.disable_gpu()
-        infer_config.set_model(self.fp32_model_path)
-        infer_config.enable_mkldnn()
-        infer_config.set_cpu_math_library_num_threads(
-            self.cpu_math_library_num_threads)
-
-        # Prepare the data for calculating the quantization scales
-        warmup_reader = context.eval_reader()
-        if six.PY2:
-            data = warmup_reader.next()
-
-        if six.PY3:
-            data = warmup_reader.__next__()
-
-        num_images = len(data)
-        image_data = [img.tolist() for (img, _) in data]
-        image_data = np.array(image_data).astype("float32").reshape(
-            [num_images, ] + list(data[0][0].shape))
-        image_data = image_data.ravel()
-        images = core.PaddleTensor(image_data, "x")
-        images.shape = [num_images, ] + list(data[0][0].shape)
-
-        label_data = [label for (_, label) in data]
-        labels = core.PaddleTensor(
-            np.array(label_data).astype("int64").reshape([num_images, 1]), "y")
-
-        warmup_data = [images, labels]
-
-        # Enable the INT8 Quantization
-        infer_config.enable_quantizer()
-        infer_config.quantizer_config().set_quant_data(warmup_data)
-        infer_config.quantizer_config().set_quant_batch_size(num_images)
-
-        # Run INT8 MKL-DNN Quantization
-        predictor = core.create_paddle_predictor(infer_config)
-        if self.int8_model_save_path:
-            if not os.path.exists(self.int8_model_save_path):
-                os.makedirs(self.int8_model_save_path)
-            predictor.SaveOptimModel(self.int8_model_save_path)
-
-        _logger.info(
-            'Finish MKLDNNPostTrainingQuantStrategy::on_compresseion_begin')
--- a/python/paddle/fluid/contrib/slim/quantization/quantization_strategy.py
+++ b/python/paddle/fluid/contrib/slim/quantization/quantization_strategy.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import sys
-import numpy as np
-from .... import Executor
-from .... import io
-from .... import core, scope_guard
-from ....compiler import CompiledProgram
-from ....compiler import BuildStrategy
-from ....framework import IrGraph, Variable, Program
-from ....log_helper import get_logger
-from ..core.strategy import Strategy
-from .quantization_pass import *
-
-__all__ = ['QuantizationStrategy']
-
-_logger = get_logger(
-    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
-
-
-class QuantizationStrategy(Strategy):
-    """
-    The strategy for Quantization.
-    """
-
-    def __init__(self,
-                 start_epoch=0,
-                 end_epoch=0,
-                 float_model_save_path=None,
-                 mobile_model_save_path=None,
-                 int8_model_save_path=None,
-                 activation_bits=8,
-                 weight_bits=8,
-                 activation_quantize_type='abs_max',
-                 weight_quantize_type='abs_max',
-                 save_in_nodes=None,
-                 save_out_nodes=None):
-        """
-        Args:
-            start_epoch(int): The 'on_epoch_begin' function will be called in start_epoch. default: 0
-            end_epoch(int): The 'on_epoch_end' function will be called in end_epoch. default: 0
-            float_model_save_path(str): The path to save model with float weights.
-                            None means it doesn't save float model. default: None.
-            mobile_model_save_path(str): The path to save model for paddle-mobile execution.
-                            None means it doesn't save mobile model. default: None.
-            int8_model_save_path(str): The path to save model with int8_t weight.
-                            None means it doesn't save int8 model. default: None.
-            activation_bits(int): quantization bit number for activation. default: 8.
-            weight_bits(int): quantization bit number for weights. The bias is not quantized.
-                              default: 8.
-            activation_quantize_type(str): quantization type for activation,
-                now support 'abs_max', 'range_abs_max' and 'moving_average_abs_max'.
-                If use 'abs_max' mode, the quantization scale will be calculated
-                dynamically each step in both training and testing period. If use
-                'range_abs_max', a static quantization scale will be calculated
-                during training and used in inference.
-            weight_quantize_type (str): quantization type for weights, support 'abs_max' and 'channel_wise_abs_max'.
-            The 'range_abs_max' usually is not used for weight, since weights are fixed once the model is well trained.
-            save_in_nodes(list<str>): A list of variable names used to prune graph
-                                      for saving inference model.
-            save_out_nodes(list<str>): A list of variable names used to prune graph
-                                      for saving inference model.
-
-        """
-        super(QuantizationStrategy, self).__init__(start_epoch, end_epoch)
-        self.start_epoch = start_epoch
-        self.end_epoch = end_epoch
-        self.float_model_save_path = float_model_save_path
-        self.mobile_model_save_path = mobile_model_save_path
-        self.int8_model_save_path = int8_model_save_path
-        self.activation_bits = activation_bits
-        self.weight_bits = weight_bits
-        self.activation_quantize_type = activation_quantize_type
-        self.weight_quantize_type = weight_quantize_type
-        self.save_out_nodes = save_out_nodes
-        self.save_in_nodes = save_in_nodes
-
-    def restore_from_checkpoint(self, context):
-        """
-        Restore graph when the compression task is inited from checkpoint.
-        """
-        # It is inited from checkpoint and has missed start epoch.
-        if context.epoch_id != 0 and context.epoch_id > self.start_epoch:
-            _logger.info("Restore quantization task from checkpoint")
-            self._modify_graph_for_quantization(context)
-            _logger.info("Finish restoring quantization task from checkpoint")
-
-    def _modify_graph_for_quantization(self, context):
-        """
-        Insert fake_quantize_op and fake_dequantize_op before training and testing.
-        """
-        train_ir_graph = IrGraph(
-            core.Graph(context.optimize_graph.program.clone().desc),
-            for_test=False)
-        test_ir_graph = IrGraph(
-            core.Graph(context.eval_graph.program.clone().desc), for_test=True)
-        transform_pass = QuantizationTransformPass(
-            scope=context.scope,
-            place=context.place,
-            weight_bits=self.weight_bits,
-            activation_bits=self.activation_bits,
-            activation_quantize_type=self.activation_quantize_type,
-            weight_quantize_type=self.weight_quantize_type)
-        transform_pass.apply(train_ir_graph)
-        transform_pass.apply(test_ir_graph)
-        # Put persistables created by transform_pass into context.optimize_graph.persistables
-        # for saving checkpoint.
-        program_persistables = set()
-        for var in context.optimize_graph.program.list_vars():
-            if var.persistable:
-                program_persistables.add(var.name)
-
-        program = Program()
-        for var_node in train_ir_graph.all_persistable_nodes():
-            if var_node.name() not in program_persistables:
-                var_desc = var_node.var()
-                var = program.global_block().create_var(
-                    name=var_node.name(),
-                    shape=var_desc.shape(),
-                    dtype=var_desc.dtype(),
-                    type=var_desc.type(),
-                    lod_level=var_desc.lod_level())
-                context.optimize_graph.persistables[var.name] = var
-
-        build_strategy = BuildStrategy()
-        build_strategy.enable_inplace = False
-        build_strategy.memory_optimize = False
-        build_strategy.fuse_all_reduce_ops = False
-        # for quantization training
-        context.optimize_graph.compiled_graph = CompiledProgram(
-            train_ir_graph.graph).with_data_parallel(
-                loss_name=context.optimize_graph.out_nodes['loss'],
-                build_strategy=build_strategy)
-
-        context.eval_graph.program = test_ir_graph.to_program()
-
-        # for saving inference model after training
-        context.put('quantization_test_ir_graph_backup', test_ir_graph)
-
-    def on_epoch_begin(self, context):
-        """
-        Insert fake_quantize_op and fake_dequantize_op before training and testing.
-        """
-        super(QuantizationStrategy, self).on_epoch_begin(context)
-        if self.start_epoch == context.epoch_id:
-            _logger.info('QuantizationStrategy::on_epoch_begin')
-            self._modify_graph_for_quantization(context)
-            _logger.info('Finish QuantizationStrategy::on_epoch_begin')
-
-    def on_epoch_end(self, context):
-        """
-        Free and save inference model.
-        """
-        super(QuantizationStrategy, self).on_compression_end(context)
-
-        if context.epoch_id == self.end_epoch:
-            _logger.info('QuantizationStrategy::on_epoch_end')
-            test_ir_graph = context.get('quantization_test_ir_graph_backup')
-            # freeze the graph after training
-            freeze_pass = QuantizationFreezePass(
-                scope=context.scope,
-                place=context.place,
-                weight_bits=self.weight_bits,
-                activation_bits=self.activation_bits,
-                weight_quantize_type=self.weight_quantize_type)
-            freeze_pass.apply(test_ir_graph)
-
-            # for other strategies
-            context.eval_graph.program = test_ir_graph.to_program()
-
-            if self.save_out_nodes == None:
-                out_vars = [
-                    context.eval_graph.var(var_name)._var
-                    for var_name in context.eval_graph.out_nodes.values()
-                ]
-            else:
-                out_vars = [
-                    context.eval_graph.var(var_name)._var
-                    for var_name in self.save_out_nodes
-                ]
-
-            if self.save_in_nodes == None:
-                in_vars = list(context.eval_graph.in_nodes.values())
-            else:
-                in_vars = self.save_in_nodes
-
-            # save float model
-            if self.float_model_save_path:
-                executor = Executor(context.place)
-                with scope_guard(context.scope):
-                    io.save_inference_model(
-                        self.float_model_save_path,
-                        in_vars,
-                        out_vars,
-                        executor,
-                        main_program=test_ir_graph.to_program(),
-                        model_filename='model',
-                        params_filename='weights',
-                        export_for_deployment=True)
-
-            # save int8 model
-            if self.int8_model_save_path:
-                convert_int8_pass = ConvertToInt8Pass(
-                    scope=context.scope, place=context.place)
-                convert_int8_pass.apply(test_ir_graph)
-
-                executor = Executor(context.place)
-
-                with scope_guard(context.scope):
-                    io.save_inference_model(
-                        self.int8_model_save_path,
-                        in_vars,
-                        out_vars,
-                        executor,
-                        main_program=test_ir_graph.to_program(),
-                        model_filename='model',
-                        params_filename='weights',
-                        export_for_deployment=True)
-
-            # save mobile model
-            if self.mobile_model_save_path:
-                if not self.int8_model_save_path:
-                    # convert the weights as int8_t type
-                    convert_int8_pass = ConvertToInt8Pass(
-                        scope=context.scope, place=context.place)
-                    convert_int8_pass.apply(test_ir_graph)
-                # make some changes on the graph for the mobile inference
-                mobile_pass = TransformForMobilePass()
-                mobile_pass.apply(test_ir_graph)
-                executor = Executor(context.place)
-                with scope_guard(context.scope):
-                    io.save_inference_model(
-                        self.mobile_model_save_path,
-                        in_vars,
-                        out_vars,
-                        executor,
-                        main_program=test_ir_graph.to_program(),
-                        model_filename='model',
-                        params_filename='weights',
-                        export_for_deployment=True)
-            _logger.info('Finish QuantizationStrategy::on_epoch_end')
--- a/python/paddle/fluid/contrib/slim/searcher/__init__.py
+++ b/python/paddle/fluid/contrib/slim/searcher/__init__.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from . import controller
-from .controller import *
-
-__all__ = controller.__all__
--- a/python/paddle/fluid/contrib/slim/searcher/controller.py
+++ b/python/paddle/fluid/contrib/slim/searcher/controller.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""The controller used to search hyperparameters or neural architecture"""
-
-import numpy as np
-import copy
-import math
-import logging
-from ....log_helper import get_logger
-
-__all__ = ['EvolutionaryController', 'SAController']
-
-_logger = get_logger(
-    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
-
-
-class EvolutionaryController(object):
-    """Abstract controller for all evolutionary searching method.
-    """
-
-    def __init__(self, *args, **kwargs):
-        pass
-
-    def update(self, tokens, reward):
-        """Update the status of controller according current tokens and reward.
-        Args:
-            tokens(list<int>): A solution of searching task.
-            reward(list<int>): The reward of tokens.
-        """
-        raise NotImplementedError('Abstract method.')
-
-    def reset(self, range_table, constrain_func=None):
-        """Reset the controller.
-        Args:
-            range_table(list<int>): It is used to define the searching space of controller.
-                                    The tokens[i] generated by controller should be in [0, range_table[i]).
-            constrain_func(function): It is used to check whether tokens meet the constraint.
-                                     None means there is no constraint. Default: None.
-        """
-        raise NotImplementedError('Abstract method.')
-
-    def next_tokens(self):
-        """Generate new tokens.
-        """
-        raise NotImplementedError('Abstract method.')
-
-
-class SAController(EvolutionaryController):
-    """Simulated annealing controller."""
-
-    def __init__(self,
-                 range_table=None,
-                 reduce_rate=0.85,
-                 init_temperature=1024,
-                 max_iter_number=300):
-        """Initialize.
-        Args:
-            range_table(list<int>): Range table.
-            reduce_rate(float): The decay rate of temperature.
-            init_temperature(float): Init temperature.
-            max_iter_number(int): max iteration number.
-        """
-        super(SAController, self).__init__()
-        self._range_table = range_table
-        self._reduce_rate = reduce_rate
-        self._init_temperature = init_temperature
-        self._max_iter_number = max_iter_number
-        self._reward = -1
-        self._tokens = None
-        self._max_reward = -1
-        self._best_tokens = None
-        self._iter = 0
-
-    def __getstate__(self):
-        d = {}
-        for key in self.__dict__:
-            if key != "_constrain_func":
-                d[key] = self.__dict__[key]
-        return d
-
-    def reset(self, range_table, init_tokens, constrain_func=None):
-        """
-        Reset the status of current controller.
-        Args:
-            range_table(list<int>): The range of value in each position of tokens generated by current controller. The range of tokens[i] is [0, range_table[i]).
-            init_tokens(list<int>): The initial tokens.
-            constrain_func(function): The callback function used to check whether the tokens meet constraint. None means there is no constraint. Default: None.
-        """
-        self._range_table = range_table
-        self._constrain_func = constrain_func
-        self._tokens = init_tokens
-        self._iter = 0
-
-    def update(self, tokens, reward):
-        """
-        Update the controller according to latest tokens and reward.
-        Args:
-            tokens(list<int>): The tokens generated in last step.
-            reward(float): The reward of tokens.
-        """
-        self._iter += 1
-        temperature = self._init_temperature * self._reduce_rate**self._iter
-        if (reward > self._reward) or (np.random.random() <= math.exp(
-            (reward - self._reward) / temperature)):
-            self._reward = reward
-            self._tokens = tokens
-        if reward > self._max_reward:
-            self._max_reward = reward
-            self._best_tokens = tokens
-        _logger.info("iter: {}; max_reward: {}; best_tokens: {}".format(
-            self._iter, self._max_reward, self._best_tokens))
-        _logger.info("current_reward: {}; current tokens: {}".format(
-            self._reward, self._tokens))
-
-    def next_tokens(self, control_token=None):
-        """
-        Get next tokens.
-        """
-        if control_token:
-            tokens = control_token[:]
-        else:
-            tokens = self._tokens
-        new_tokens = tokens[:]
-        index = int(len(self._range_table) * np.random.random())
-        new_tokens[index] = (
-            new_tokens[index] + np.random.randint(self._range_table[index] - 1)
-            + 1) % self._range_table[index]
-        _logger.info("change index[{}] from {} to {}".format(index, tokens[
-            index], new_tokens[index]))
-        if self._constrain_func is None:
-            return new_tokens
-        for _ in range(self._max_iter_number):
-            if not self._constrain_func(new_tokens):
-                index = int(len(self._range_table) * np.random.random())
-                new_tokens = tokens[:]
-                new_tokens[index] = np.random.randint(self._range_table[index])
-            else:
-                break
-        return new_tokens
--- a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
+++ b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
@@ -138,41 +138,6 @@ if(LINUX AND WITH_MKLDNN)
 	# Models should be already downloaded for INT8v2 unit tests

 	set(INT8_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/int8v2")
-	set(INT8_IC_TEST_FILE "test_mkldnn_int8_quantization_strategy.py")
-	set(INT8_IC_TEST_FILE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/${INT8_IC_TEST_FILE}")
-
-	# googlenet int8
-	set(INT8_GOOGLENET_MODEL_DIR "${INT8_INSTALL_DIR}/googlenet")
-	inference_analysis_python_api_int8_test_custom_warmup_batch_size(test_slim_int8_googlenet ${INT8_GOOGLENET_MODEL_DIR} ${IMAGENET_DATA_PATH} ${INT8_IC_TEST_FILE_PATH} 10)
-
-	# mobilenet int8
-	set(INT8_MOBILENET_MODEL_DIR "${INT8_INSTALL_DIR}/mobilenetv1")
-	inference_analysis_python_api_int8_test(test_slim_int8_mobilenet ${INT8_MOBILENET_MODEL_DIR} ${IMAGENET_DATA_PATH} ${INT8_IC_TEST_FILE_PATH})
-	inference_analysis_python_api_int8_test_mkldnn(test_slim_int8_mobilenet_mkldnn ${INT8_MOBILENET_MODEL_DIR} ${IMAGENET_DATA_PATH} ${INT8_IC_TEST_FILE_PATH})
-
-	# temporarily adding WITH_SLIM_MKLDNN_FULL_TEST FLAG for QA testing the following UTs locally,
-	# since the following UTs cost too much time on CI test.
-	if (WITH_SLIM_MKLDNN_FULL_TEST)
-		# resnet50 int8
-		set(INT8_RESNET50_MODEL_DIR "${INT8_INSTALL_DIR}/resnet50")
-		inference_analysis_python_api_int8_test(test_slim_int8_resnet50 ${INT8_RESNET50_MODEL_DIR} ${IMAGENET_DATA_PATH} ${INT8_IC_TEST_FILE_PATH})
-
-		# mobilenetv2 int8
-		set(INT8_MOBILENETV2_MODEL_DIR "${INT8_INSTALL_DIR}/mobilenetv2")
-		inference_analysis_python_api_int8_test(test_slim_int8_mobilenetv2 ${INT8_MOBILENETV2_MODEL_DIR} ${IMAGENET_DATA_PATH} ${INT8_IC_TEST_FILE_PATH})
-
-		# resnet101 int8
-		set(INT8_RESNET101_MODEL_DIR "${INT8_INSTALL_DIR}/resnet101")
-		inference_analysis_python_api_int8_test(test_slim_int8_resnet101 ${INT8_RESNET101_MODEL_DIR} ${IMAGENET_DATA_PATH} ${INT8_IC_TEST_FILE_PATH})
-
-		# vgg16 int8
-		set(INT8_VGG16_MODEL_DIR "${INT8_INSTALL_DIR}/vgg16")
-		inference_analysis_python_api_int8_test(test_slim_int8_vgg16 ${INT8_VGG16_MODEL_DIR} ${IMAGENET_DATA_PATH} ${INT8_IC_TEST_FILE_PATH})
-
-		# vgg19 int8
-		set(INT8_VGG19_MODEL_DIR "${INT8_INSTALL_DIR}/vgg19")
-		inference_analysis_python_api_int8_test(test_slim_int8_vgg19 ${INT8_VGG19_MODEL_DIR} ${IMAGENET_DATA_PATH} ${INT8_IC_TEST_FILE_PATH})
-	endif()

 	#### QUANT & INT8 comparison python api tests


--- a/python/paddle/fluid/contrib/slim/tests/auto_pruning/compress.yaml
+++ b/python/paddle/fluid/contrib/slim/tests/auto_pruning/compress.yaml
-version: 1.0
-pruners:
-    pruner_1:
-        class: 'StructurePruner'
-        pruning_axis:
-            '*': 0
-        criterions:
-            '*': 'l1_norm'
-controllers:
-    sa_controller:
-        class: 'SAController'
-        reduce_rate: 0.9
-        init_temperature: 1024
-        max_iter_number: 300
-strategies:
-    auto_pruning_strategy:
-        class: 'AutoPruneStrategy'
-        pruner: 'pruner_1'
-        controller: 'sa_controller'
-        start_epoch: 0
-        end_epoch: 2
-        max_ratio: 0.7
-        min_ratio: 0.5
-        pruned_params: '.*_sep_weights'
-        metric_name: 'acc_top5'
-compressor:
-    epoch: 2
-    checkpoint_path: './checkpoints_auto_pruning/'
-    strategies:
-        - auto_pruning_strategy
--- a/python/paddle/fluid/contrib/slim/tests/configs/compress.yaml
+++ b/python/paddle/fluid/contrib/slim/tests/configs/compress.yaml
-version: 1.0
-compressor:
-    epoch: 1 
-    checkpoint_path: './checkpoints/'
--- a/python/paddle/fluid/contrib/slim/tests/configs/filter_pruning.yaml
+++ b/python/paddle/fluid/contrib/slim/tests/configs/filter_pruning.yaml
-#start_epoch:         The 'on_epoch_begin' function will be called in start_epoch. default: 0.
-#end_epoch:           The 'on_epoch_end' function will be called in end_epoch. default: 10.
-#delta_rate:          The delta used to generate ratios when calculating sensitivities.
-#target_ratio:        The flops ratio to be pruned from current model.
-#metric_name:         The metric used to evaluate the model.
-#pruned_params:       The pattern str to match the parameter names to be pruned.
-#sensitivities_file:  The sensitivities file.
-#num_steps:           The number of pruning steps.
-#eval_rate:           The rate of sampled data used to calculate sensitivities.
-version: 1.0
-pruners:
-    pruner_1:
-        class: 'StructurePruner'
-        pruning_axis:
-            '*': 0
-        criterions:
-            '*': 'l1_norm'
-strategies:
-    sensitive_pruning_strategy:
-        class: 'SensitivePruneStrategy'
-        pruner: 'pruner_1'
-        start_epoch: 0
-        delta_rate: 0.1
-        target_ratio: 0.3
-        num_steps: 1
-        eval_rate: 0.5
-        pruned_params: '.*_sep_weights'
-        sensitivities_file: 'mobilenet_acc_top1_sensitive.data'
-        metric_name: 'acc_top1'
-compressor:
-    epoch: 120
-    checkpoint_path: './checkpoints/'
-    strategies:
-        - sensitive_pruning_strategy
--- a/python/paddle/fluid/contrib/slim/tests/distillation/compress.yaml
+++ b/python/paddle/fluid/contrib/slim/tests/distillation/compress.yaml
-#start_epoch(int): The epoch when to merge student graph and teacher graph for
-#                  distillation training. default: 0
-#
-#end_epoch(int): The epoch when to finish distillation training. default: 0
-#
-#student_feature_map(str): The name of feature map from student network.
-#
-#teacher_feature_map(str): The name of feature map from teacher network.
-#                          It's shape should be the same with student network.
-#
-#student_pairs(list<tuple>): Each tuple, with two variable names, in student_pairs indicates
-#                            a section in student network. The variables in a tuple should
-#                            have the same feature map size.
-#
-#teacher_pairs(list<tuple>): Each tuple, with two variable names, in teacher_pairs indicates
-#                            a section in teacher network. The variables in a tuple should
-#                            have the same feature map size. Varibale named teacher_pairs[i][j]
-#                            should has the save channel number with that of variable named 
-#                            student_pairs[i][j].
-#
-#distillation_loss_weight(float): The weight of the loss.
-version: 1.0
-distillers:
-    fsp_distiller:
-        class: 'FSPDistiller'
-#        teacher_pairs: [['teacher_depthwise_conv2d_1.tmp_0', 'teacher_conv2d_3.tmp_0']]
-#        student_pairs: [['student_depthwise_conv2d_1.tmp_0', 'student_conv2d_3.tmp_0']]
-        teacher_pairs: [['teacher_conv2_1_dw.tmp_0', 'teacher_conv1.tmp_0']]
-        student_pairs: [['student_conv2_1_dw.tmp_0', 'student_conv1.tmp_0']]
-        distillation_loss_weight: 1
-    l2_distiller:
-        class: 'L2Distiller'
-        teacher_feature_map: 'teacher.tmp_1'
-        student_feature_map: 'student.tmp_1'
-        distillation_loss_weight: 1
-    soft_label_distiller:
-        class: 'SoftLabelDistiller'
-        student_temperature: 1.0
-        teacher_temperature: 1.0 
-        teacher_feature_map: 'teacher.tmp_2'
-        student_feature_map: 'student.tmp_2'
-        distillation_loss_weight: 0.001
-strategies:
-    distillation_strategy:
-        class: 'DistillationStrategy'
-        distillers: ['fsp_distiller', 'l2_distiller', 'soft_label_distiller']
-        start_epoch: 0
-        end_epoch: 1
-compressor:
-    epoch: 1
-    checkpoint_path: './distillation_checkpoints/'
-    strategies:
-        - distillation_strategy
--- a/python/paddle/fluid/contrib/slim/tests/filter_pruning/compress.yaml
+++ b/python/paddle/fluid/contrib/slim/tests/filter_pruning/compress.yaml
-#start_epoch:         The 'on_epoch_begin' function will be called in start_epoch. default: 0.
-#end_epoch:           The 'on_epoch_end' function will be called in end_epoch. default: 10.
-#delta_rate:          The delta used to generate ratios when calculating sensitivities.
-#target_ratio:        The flops ratio to be pruned from current model.
-#metric_name:         The metric used to evaluate the model.
-#pruned_params:       The pattern str to match the parameter names to be pruned.
-#sensitivities_file:  The sensitivities file.
-#num_steps:           The number of pruning steps.
-#eval_rate:           The rate of sampled data used to calculate sensitivities.
-version: 1.0
-pruners:
-    pruner_1:
-        class: 'StructurePruner'
-        pruning_axis:
-            '*': 0
-        criterions:
-            '*': 'l1_norm'
-strategies:
-    sensitive_pruning_strategy:
-        class: 'SensitivePruneStrategy'
-        pruner: 'pruner_1'
-        start_epoch: 1
-        delta_rate: 0.2
-        target_ratio: 0.08
-        num_steps: 1
-        eval_rate: 0.5
-        pruned_params: '_conv6_sep_weights'
-        sensitivities_file: 'mobilenet_acc_top1_sensitive.data'
-        metric_name: 'acc_top1'
-compressor:
-    epoch: 2
-    checkpoint_path: './checkpoints_pruning/'
-    strategies:
-        - sensitive_pruning_strategy
--- a/python/paddle/fluid/contrib/slim/tests/filter_pruning/uniform_restore.yaml
+++ b/python/paddle/fluid/contrib/slim/tests/filter_pruning/uniform_restore.yaml
-version: 1.0
-pruners:
-    pruner_1:
-        class: 'StructurePruner'
-        pruning_axis:
-            '*': 0
-        criterions:
-            '*': 'l1_norm'
-strategies:
-    uniform_pruning_strategy:
-        class: 'UniformPruneStrategy'
-        pruner: 'pruner_1'
-        start_epoch: 0
-        target_ratio: 0.5
-        pruned_params: 'conv.*'
-        metric_name: 'acc_top1'
-compressor:
-    epoch: 2
-    checkpoint_path: './checkpoints_uniform_restore_tmp/'
-    strategies:
-        - uniform_pruning_strategy
--- a/python/paddle/fluid/contrib/slim/tests/filter_pruning/uniform_restore_0.yaml
+++ b/python/paddle/fluid/contrib/slim/tests/filter_pruning/uniform_restore_0.yaml
-version: 1.0
-pruners:
-    pruner_1:
-        class: 'StructurePruner'
-        pruning_axis:
-            '*': 0
-        criterions:
-            '*': 'l1_norm'
-strategies:
-    uniform_pruning_strategy:
-        class: 'UniformPruneStrategy'
-        pruner: 'pruner_1'
-        start_epoch: 0
-        target_ratio: 0.5
-        pruned_params: 'conv.*'
-        metric_name: 'acc_top1'
-compressor:
-    epoch: 1
-    checkpoint_path: './checkpoints_uniform_restore/'
-    strategies:
-        - uniform_pruning_strategy
--- a/python/paddle/fluid/contrib/slim/tests/filter_pruning/uniform_restore_1.yaml
+++ b/python/paddle/fluid/contrib/slim/tests/filter_pruning/uniform_restore_1.yaml
-version: 1.0
-pruners:
-    pruner_1:
-        class: 'StructurePruner'
-        pruning_axis:
-            '*': 0
-        criterions:
-            '*': 'l1_norm'
-strategies:
-    uniform_pruning_strategy:
-        class: 'UniformPruneStrategy'
-        pruner: 'pruner_1'
-        start_epoch: 0
-        target_ratio: 0.5
-        pruned_params: 'conv.*'
-        metric_name: 'acc_top1'
-compressor:
-    epoch: 2
-    checkpoint_path: './checkpoints_uniform_restore/'
-    strategies:
-        - uniform_pruning_strategy
--- a/python/paddle/fluid/contrib/slim/tests/light_nas/compress.yaml
+++ b/python/paddle/fluid/contrib/slim/tests/light_nas/compress.yaml
-version: 1.0
-controllers:
-    sa_controller:
-        class: 'SAController'
-        reduce_rate: 0.9
-        init_temperature: 1024
-        max_iter_number: 300
-strategies:
-    light_nas_strategy:
-        class: 'LightNASStrategy'
-        controller: 'sa_controller'
-        target_flops: 629145600
-        target_latency: 1
-        end_epoch: 2
-        retrain_epoch: 1
-        metric_name: 'acc_top1'
-        is_server: 1
-        max_client_num: 100
-        search_steps: 2
-compressor:
-    epoch: 2
-    strategies:
-        - light_nas_strategy
--- a/python/paddle/fluid/contrib/slim/tests/light_nas/light_nas_space.py
+++ b/python/paddle/fluid/contrib/slim/tests/light_nas/light_nas_space.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.fluid.contrib.slim.nas import SearchSpace
-from light_nasnet import LightNASNet
-import paddle.fluid as fluid
-import paddle
-import json
-import random
-
-total_images = 1281167
-lr = 0.1
-num_epochs = 1
-batch_size = 256
-lr_strategy = "cosine_decay"
-l2_decay = 4e-5
-momentum_rate = 0.9
-image_shape = [1, 28, 28]
-
-__all__ = ['LightNASSpace']
-
-NAS_FILTER_SIZE = [[18, 24, 30], [24, 32, 40], [48, 64, 80], [72, 96, 120],
-                   [120, 160, 192]]
-NAS_LAYERS_NUMBER = [[1, 2, 3], [2, 3, 4], [3, 4, 5], [2, 3, 4], [2, 3, 4]]
-NAS_KERNEL_SIZE = [3, 5]
-NAS_FILTERS_MULTIPLIER = [3, 4, 5, 6]
-NAS_SHORTCUT = [0, 1]
-NAS_SE = [0, 1]
-
-
-def get_bottleneck_params_list(var):
-    """Get bottleneck_params_list from var.
-    Args:
-        var: list, variable list.
-    Returns:
-        list, bottleneck_params_list.
-    """
-    params_list = [
-        1, 16, 1, 1, 3, 1, 0, \
-        6, 24, 2, 2, 3, 1, 0, \
-        6, 32, 3, 2, 3, 1, 0, \
-        6, 64, 4, 2, 3, 1, 0, \
-        6, 96, 3, 1, 3, 1, 0, \
-        6, 160, 3, 2, 3, 1, 0, \
-        6, 320, 1, 1, 3, 1, 0, \
-    ]
-    for i in range(5):
-        params_list[i * 7 + 7] = NAS_FILTERS_MULTIPLIER[var[i * 6]]
-        params_list[i * 7 + 8] = NAS_FILTER_SIZE[i][var[i * 6 + 1]]
-        params_list[i * 7 + 9] = NAS_LAYERS_NUMBER[i][var[i * 6 + 2]]
-        params_list[i * 7 + 11] = NAS_KERNEL_SIZE[var[i * 6 + 3]]
-        params_list[i * 7 + 12] = NAS_SHORTCUT[var[i * 6 + 4]]
-        params_list[i * 7 + 13] = NAS_SE[var[i * 6 + 5]]
-    return params_list
-
-
-class LightNASSpace(SearchSpace):
-    def __init__(self):
-        super(LightNASSpace, self).__init__()
-
-    def init_tokens(self):
-        """Get init tokens in search space.
-        """
-        return [
-            0, 1, 2, 0, 1, 0, 0, 2, 1, 1, 1, 0, 3, 2, 0, 1, 1, 0, 3, 1, 0, 0, 1,
-            0, 3, 2, 2, 1, 1, 0
-        ]
-
-    def range_table(self):
-        """Get range table of current search space.
-        """
-        # [NAS_FILTER_SIZE, NAS_LAYERS_NUMBER, NAS_KERNEL_SIZE, NAS_FILTERS_MULTIPLIER, NAS_SHORTCUT, NAS_SE]
-        return [
-            4, 3, 3, 2, 2, 2, 4, 3, 3, 2, 2, 2, 4, 3, 3, 2, 2, 2, 4, 3, 3, 2, 2,
-            2, 4, 3, 3, 2, 2, 2
-        ]
-
-    def get_model_latency(self, program):
-        """Get model latency according to program.
-        Returns a random number since it's only for testing.
-        Args:
-            program(Program): The program to get latency.
-        Return:
-            (float): model latency.
-        """
-        return random.randint(1, 2)
-
-    def create_net(self, tokens=None):
-        """Create a network for training by tokens.
-        """
-        if tokens is None:
-            tokens = self.init_tokens()
-
-        bottleneck_params_list = get_bottleneck_params_list(tokens)
-
-        startup_prog = fluid.Program()
-        train_prog = fluid.Program()
-        test_prog = fluid.Program()
-        train_py_reader, train_cost, train_acc1, train_acc5, global_lr = build_program(
-            is_train=True,
-            main_prog=train_prog,
-            startup_prog=startup_prog,
-            bottleneck_params_list=bottleneck_params_list)
-        test_py_reader, test_cost, test_acc1, test_acc5 = build_program(
-            is_train=False,
-            main_prog=test_prog,
-            startup_prog=startup_prog,
-            bottleneck_params_list=bottleneck_params_list)
-        test_prog = test_prog.clone(for_test=True)
-        train_batch_size = batch_size / 1
-        test_batch_size = batch_size
-        train_reader = paddle.batch(
-            paddle.dataset.mnist.train(),
-            batch_size=train_batch_size,
-            drop_last=True)
-        test_reader = paddle.batch(
-            paddle.dataset.mnist.test(), batch_size=test_batch_size)
-
-        with fluid.program_guard(train_prog, startup_prog):
-            train_py_reader.decorate_paddle_reader(train_reader)
-
-        with fluid.program_guard(test_prog, startup_prog):
-            test_py_reader.decorate_paddle_reader(test_reader)
-        return startup_prog, train_prog, test_prog, (
-            train_cost, train_acc1, train_acc5,
-            global_lr), (test_cost, test_acc1,
-                         test_acc5), train_py_reader, test_py_reader
-
-
-def build_program(is_train,
-                  main_prog,
-                  startup_prog,
-                  bottleneck_params_list=None):
-    with fluid.program_guard(main_prog, startup_prog):
-        py_reader = fluid.layers.py_reader(
-            capacity=16,
-            shapes=[[-1] + image_shape, [-1, 1]],
-            lod_levels=[0, 0],
-            dtypes=["float32", "int64"],
-            use_double_buffer=False)
-        with fluid.unique_name.guard():
-            image, label = fluid.layers.read_file(py_reader)
-            model = LightNASNet()
-            avg_cost, acc_top1, acc_top5 = net_config(
-                image,
-                label,
-                model,
-                class_dim=10,
-                bottleneck_params_list=bottleneck_params_list,
-                scale_loss=1.0)
-
-            avg_cost.persistable = True
-            acc_top1.persistable = True
-            acc_top5.persistable = True
-            if is_train:
-                params = model.params
-                params["total_images"] = total_images
-                params["lr"] = lr
-                params["num_epochs"] = num_epochs
-                params["learning_strategy"]["batch_size"] = batch_size
-                params["learning_strategy"]["name"] = lr_strategy
-                params["l2_decay"] = l2_decay
-                params["momentum_rate"] = momentum_rate
-                optimizer = optimizer_setting(params)
-                optimizer.minimize(avg_cost)
-                global_lr = optimizer._global_learning_rate()
-
-        if is_train:
-            return py_reader, avg_cost, acc_top1, acc_top5, global_lr
-        else:
-            return py_reader, avg_cost, acc_top1, acc_top5
-
-
-def net_config(image,
-               label,
-               model,
-               class_dim=1000,
-               bottleneck_params_list=None,
-               scale_loss=1.0):
-    bottleneck_params_list = [
-        bottleneck_params_list[i:i + 7]
-        for i in range(0, len(bottleneck_params_list), 7)
-    ]
-    out = model.net(input=image,
-                    bottleneck_params_list=bottleneck_params_list,
-                    class_dim=class_dim)
-    cost, pred = fluid.layers.softmax_with_cross_entropy(
-        out, label, return_softmax=True)
-    if scale_loss > 1:
-        avg_cost = fluid.layers.mean(x=cost) * float(scale_loss)
-    else:
-        avg_cost = fluid.layers.mean(x=cost)
-    acc_top1 = fluid.layers.accuracy(input=pred, label=label, k=1)
-    acc_top5 = fluid.layers.accuracy(input=pred, label=label, k=5)
-    return avg_cost, acc_top1, acc_top5
-
-
-def optimizer_setting(params):
-    """optimizer setting.
-    Args:
-        params: dict, params.
-    """
-    ls = params["learning_strategy"]
-    l2_decay = params["l2_decay"]
-    momentum_rate = params["momentum_rate"]
-    if ls["name"] == "piecewise_decay":
-        if "total_images" not in params:
-            total_images = IMAGENET1000
-        else:
-            total_images = params["total_images"]
-        batch_size = ls["batch_size"]
-        step = int(total_images / batch_size + 1)
-        bd = [step * e for e in ls["epochs"]]
-        base_lr = params["lr"]
-        lr = []
-        lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)]
-        optimizer = fluid.optimizer.Momentum(
-            learning_rate=fluid.layers.piecewise_decay(
-                boundaries=bd, values=lr),
-            momentum=momentum_rate,
-            regularization=fluid.regularizer.L2Decay(l2_decay))
-    elif ls["name"] == "cosine_decay":
-        if "total_images" not in params:
-            total_images = IMAGENET1000
-        else:
-            total_images = params["total_images"]
-        batch_size = ls["batch_size"]
-        step = int(total_images / batch_size + 1)
-        lr = params["lr"]
-        num_epochs = params["num_epochs"]
-        optimizer = fluid.optimizer.Momentum(
-            learning_rate=fluid.layers.cosine_decay(
-                learning_rate=lr, step_each_epoch=step, epochs=num_epochs),
-            momentum=momentum_rate,
-            regularization=fluid.regularizer.L2Decay(l2_decay))
-    elif ls["name"] == "cosine_warmup_decay":
-        if "total_images" not in params:
-            total_images = IMAGENET1000
-        else:
-            total_images = params["total_images"]
-        batch_size = ls["batch_size"]
-        l2_decay = params["l2_decay"]
-        momentum_rate = params["momentum_rate"]
-        step = int(math.ceil(float(total_images) / batch_size))
-        lr = params["lr"]
-        num_epochs = params["num_epochs"]
-        optimizer = fluid.optimizer.Momentum(
-            learning_rate=cosine_decay_with_warmup(
-                learning_rate=lr, step_each_epoch=step, epochs=num_epochs),
-            momentum=momentum_rate,
-            regularization=fluid.regularizer.L2Decay(l2_decay))
-    elif ls["name"] == "linear_decay":
-        if "total_images" not in params:
-            total_images = IMAGENET1000
-        else:
-            total_images = params["total_images"]
-        batch_size = ls["batch_size"]
-        num_epochs = params["num_epochs"]
-        start_lr = params["lr"]
-        end_lr = 0
-        total_step = int((total_images / batch_size) * num_epochs)
-        lr = fluid.layers.polynomial_decay(
-            start_lr, total_step, end_lr, power=1)
-        optimizer = fluid.optimizer.Momentum(
-            learning_rate=lr,
-            momentum=momentum_rate,
-            regularization=fluid.regularizer.L2Decay(l2_decay))
-    elif ls["name"] == "adam":
-        lr = params["lr"]
-        optimizer = fluid.optimizer.Adam(learning_rate=lr)
-    else:
-        lr = params["lr"]
-        optimizer = fluid.optimizer.Momentum(
-            learning_rate=lr,
-            momentum=momentum_rate,
-            regularization=fluid.regularizer.L2Decay(l2_decay))
-    return optimizer
--- a/python/paddle/fluid/contrib/slim/tests/light_nas/light_nasnet.py
+++ b/python/paddle/fluid/contrib/slim/tests/light_nas/light_nasnet.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""LightNASNet."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-import paddle.fluid as fluid
-from paddle.fluid.param_attr import ParamAttr
-
-__all__ = ['LightNASNet']
-
-train_parameters = {
-    "input_size": [3, 224, 224],
-    "input_mean": [0.485, 0.456, 0.406],
-    "input_std": [0.229, 0.224, 0.225],
-    "learning_strategy": {
-        "name": "piecewise_decay",
-        "batch_size": 256,
-        "epochs": [30, 60, 90],
-        "steps": [0.1, 0.01, 0.001, 0.0001]
-    }
-}
-
-
-class LightNASNet(object):
-    """LightNASNet."""
-
-    def __init__(self):
-        self.params = train_parameters
-
-    def net(self, input, bottleneck_params_list=None, class_dim=1000,
-            scale=1.0):
-        """Build network.
-        Args:
-            input: Variable, input.
-            class_dim: int, class dim.
-            scale: float, scale.
-        Returns:
-            Variable, network output.
-        """
-        if bottleneck_params_list is None:
-            # MobileNetV2
-            # bottleneck_params_list = [
-            #     (1, 16, 1, 1, 3, 1, 0),
-            #     (6, 24, 2, 2, 3, 1, 0),
-            #     (6, 32, 3, 2, 3, 1, 0),
-            #     (6, 64, 4, 2, 3, 1, 0),
-            #     (6, 96, 3, 1, 3, 1, 0),
-            #     (6, 160, 3, 2, 3, 1, 0),
-            #     (6, 320, 1, 1, 3, 1, 0),
-            # ]
-            bottleneck_params_list = [
-                (1, 16, 1, 1, 3, 1, 0),
-                (3, 24, 3, 2, 3, 1, 0),
-                (3, 40, 3, 2, 5, 1, 0),
-                (6, 80, 3, 2, 5, 1, 0),
-                (6, 96, 2, 1, 3, 1, 0),
-                (6, 192, 4, 2, 5, 1, 0),
-                (6, 320, 1, 1, 3, 1, 0),
-            ]
-
-        #conv1
-        input = self.conv_bn_layer(
-            input,
-            num_filters=int(32 * scale),
-            filter_size=3,
-            stride=2,
-            padding=1,
-            if_act=True,
-            name='conv1_1')
-
-        # bottleneck sequences
-        i = 1
-        in_c = int(32 * scale)
-        for layer_setting in bottleneck_params_list:
-            t, c, n, s, k, ifshortcut, ifse = layer_setting
-            i += 1
-            input = self.invresi_blocks(
-                input=input,
-                in_channel=in_c,
-                expansion=t,
-                out_channel=int(c * scale),
-                num_layers=n,
-                stride=s,
-                filter_size=k,
-                shortcut=ifshortcut,
-                squeeze=ifse,
-                name='conv' + str(i))
-            in_c = int(c * scale)
-        #last_conv
-        input = self.conv_bn_layer(
-            input=input,
-            num_filters=int(1280 * scale) if scale > 1.0 else 1280,
-            filter_size=1,
-            stride=1,
-            padding=0,
-            if_act=True,
-            name='conv9')
-
-        input = fluid.layers.pool2d(
-            input=input,
-            pool_size=7,
-            pool_stride=1,
-            pool_type='avg',
-            global_pooling=True)
-
-        output = fluid.layers.fc(input=input,
-                                 size=class_dim,
-                                 param_attr=ParamAttr(name='fc10_weights'),
-                                 bias_attr=ParamAttr(name='fc10_offset'))
-        return output
-
-    def conv_bn_layer(self,
-                      input,
-                      filter_size,
-                      num_filters,
-                      stride,
-                      padding,
-                      num_groups=1,
-                      if_act=True,
-                      name=None,
-                      use_cudnn=True):
-        """Build convolution and batch normalization layers.
-        Args:
-            input: Variable, input.
-            filter_size: int, filter size.
-            num_filters: int, number of filters.
-            stride: int, stride.
-            padding: int, padding.
-            num_groups: int, number of groups.
-            if_act: bool, whether using activation.
-            name: str, name.
-            use_cudnn: bool, whether use cudnn.
-        Returns:
-            Variable, layers output.
-        """
-        conv = fluid.layers.conv2d(
-            input=input,
-            num_filters=num_filters,
-            filter_size=filter_size,
-            stride=stride,
-            padding=padding,
-            groups=num_groups,
-            act=None,
-            use_cudnn=use_cudnn,
-            param_attr=ParamAttr(name=name + '_weights'),
-            bias_attr=False)
-        bn_name = name + '_bn'
-        bn = fluid.layers.batch_norm(
-            input=conv,
-            param_attr=ParamAttr(name=bn_name + "_scale"),
-            bias_attr=ParamAttr(name=bn_name + "_offset"),
-            moving_mean_name=bn_name + '_mean',
-            moving_variance_name=bn_name + '_variance')
-        if if_act:
-            return fluid.layers.relu6(bn)
-        else:
-            return bn
-
-    def shortcut(self, input, data_residual):
-        """Build shortcut layer.
-        Args:
-            input: Variable, input.
-            data_residual: Variable, residual layer.
-        Returns:
-            Variable, layer output.
-        """
-        return fluid.layers.elementwise_add(input, data_residual)
-
-    def squeeze_excitation(self,
-                           input,
-                           num_channels,
-                           reduction_ratio,
-                           name=None):
-        """Build squeeze excitation layers.
-        Args:
-            input: Variable, input.
-            num_channels: int, number of channels.
-            reduction_ratio: float, reduction ratio.
-            name: str, name.
-        Returns:
-            Variable, layers output.
-        """
-        pool = fluid.layers.pool2d(
-            input=input, pool_size=0, pool_type='avg', global_pooling=True)
-        stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
-        squeeze = fluid.layers.fc(
-            input=pool,
-            size=num_channels // reduction_ratio,
-            act='relu',
-            param_attr=fluid.param_attr.ParamAttr(
-                initializer=fluid.initializer.Uniform(-stdv, stdv),
-                name=name + '_sqz_weights'),
-            bias_attr=ParamAttr(name=name + '_sqz_offset'))
-        stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0)
-        excitation = fluid.layers.fc(
-            input=squeeze,
-            size=num_channels,
-            act='sigmoid',
-            param_attr=fluid.param_attr.ParamAttr(
-                initializer=fluid.initializer.Uniform(-stdv, stdv),
-                name=name + '_exc_weights'),
-            bias_attr=ParamAttr(name=name + '_exc_offset'))
-        scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0)
-        return scale
-
-    def inverted_residual_unit(self,
-                               input,
-                               num_in_filter,
-                               num_filters,
-                               ifshortcut,
-                               ifse,
-                               stride,
-                               filter_size,
-                               expansion_factor,
-                               reduction_ratio=4,
-                               name=None):
-        """Build inverted residual unit.
-        Args:
-            input(Variable): Theinput.
-            num_in_filter(int): The number of input filters.
-            num_filters(int): The number of filters.
-            ifshortcut(bool): Whether to use shortcut.
-            stride(int): The stride.
-            filter_size(int): The filter size.
-            padding(int): The padding.
-            expansion_factor(float): Expansion factor.
-            name(str): The name.
-        Returns:
-            Variable, layers output.
-        """
-        num_expfilter = int(round(num_in_filter * expansion_factor))
-        channel_expand = self.conv_bn_layer(
-            input=input,
-            num_filters=num_expfilter,
-            filter_size=1,
-            stride=1,
-            padding=0,
-            num_groups=1,
-            if_act=True,
-            name=name + '_expand')
-
-        bottleneck_conv = self.conv_bn_layer(
-            input=channel_expand,
-            num_filters=num_expfilter,
-            filter_size=filter_size,
-            stride=stride,
-            padding=int((filter_size - 1) / 2),
-            num_groups=num_expfilter,
-            if_act=True,
-            name=name + '_dwise',
-            use_cudnn=False)
-
-        linear_out = self.conv_bn_layer(
-            input=bottleneck_conv,
-            num_filters=num_filters,
-            filter_size=1,
-            stride=1,
-            padding=0,
-            num_groups=1,
-            if_act=False,
-            name=name + '_linear')
-        out = linear_out
-        if ifshortcut:
-            out = self.shortcut(input=input, data_residual=out)
-        if ifse:
-            scale = self.squeeze_excitation(
-                input=linear_out,
-                num_channels=num_filters,
-                reduction_ratio=reduction_ratio,
-                name=name + '_fc')
-            out = fluid.layers.elementwise_add(x=out, y=scale, act='relu')
-        return out
-
-    def invresi_blocks(self,
-                       input,
-                       in_channel,
-                       expansion,
-                       out_channel,
-                       num_layers,
-                       stride,
-                       filter_size,
-                       shortcut,
-                       squeeze,
-                       name=None):
-        """Build inverted residual blocks.
-        Args:
-            input(Variable): The input feture map.
-            in_channel(int): The number of input channel.
-            expansion(float): Expansion factor.
-            out_channel(int): The number of output channel.
-            num_layers(int): The number of layers.
-            stride(int): The stride.
-            filter_size(int): The size of filter.
-            shortcut(bool): Whether to add shortcut layers.
-            squeeze(bool): Whether to add squeeze excitation layers.
-            name(str): The name.
-        Returns:
-            Variable, layers output.
-        """
-        first_block = self.inverted_residual_unit(
-            input=input,
-            num_in_filter=in_channel,
-            num_filters=out_channel,
-            ifshortcut=False,
-            ifse=squeeze,
-            stride=stride,
-            filter_size=filter_size,
-            expansion_factor=expansion,
-            name=name + '_1')
-
-        last_residual_block = first_block
-        last_c = out_channel
-
-        for i in range(1, num_layers):
-            last_residual_block = self.inverted_residual_unit(
-                input=last_residual_block,
-                num_in_filter=last_c,
-                num_filters=out_channel,
-                ifshortcut=shortcut,
-                ifse=squeeze,
-                stride=1,
-                filter_size=filter_size,
-                expansion_factor=expansion,
-                name=name + '_' + str(i + 1))
-        return last_residual_block
--- a/python/paddle/fluid/contrib/slim/tests/mobilenet.py
+++ b/python/paddle/fluid/contrib/slim/tests/mobilenet.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import paddle.fluid as fluid
-from paddle.fluid.initializer import MSRA
-from paddle.fluid.param_attr import ParamAttr
-
-__all__ = ['MobileNet']
-
-train_parameters = {
-    "input_size": [3, 224, 224],
-    "input_mean": [0.485, 0.456, 0.406],
-    "input_std": [0.229, 0.224, 0.225],
-    "learning_strategy": {
-        "name": "piecewise_decay",
-        "batch_size": 256,
-        "epochs": [30, 60, 90],
-        "steps": [0.1, 0.01, 0.001, 0.0001]
-    }
-}
-
-
-class MobileNet():
-    def __init__(self, name=""):
-        self.params = train_parameters
-        self.name = name
-
-    def net(self, input, class_dim=1000, scale=1.0):
-        # conv1: 112x112
-        input = self.conv_bn_layer(
-            input,
-            filter_size=3,
-            channels=3,
-            num_filters=int(32 * scale),
-            stride=2,
-            padding=1,
-            name=self.name + "_conv1")
-
-        # 56x56
-        input = self.depthwise_separable(
-            input,
-            num_filters1=32,
-            num_filters2=64,
-            num_groups=32,
-            stride=1,
-            scale=scale,
-            name=self.name + "_conv2_1")
-
-        input = self.depthwise_separable(
-            input,
-            num_filters1=64,
-            num_filters2=128,
-            num_groups=64,
-            stride=2,
-            scale=scale,
-            name=self.name + "_conv2_2")
-
-        # 28x28
-        input = self.depthwise_separable(
-            input,
-            num_filters1=128,
-            num_filters2=128,
-            num_groups=128,
-            stride=1,
-            scale=scale,
-            name=self.name + "_conv3_1")
-
-        input = self.depthwise_separable(
-            input,
-            num_filters1=128,
-            num_filters2=256,
-            num_groups=128,
-            stride=2,
-            scale=scale,
-            name=self.name + "_conv3_2")
-
-        # 14x14
-        input = self.depthwise_separable(
-            input,
-            num_filters1=256,
-            num_filters2=256,
-            num_groups=256,
-            stride=1,
-            scale=scale,
-            name=self.name + "_conv4_1")
-
-        input = self.depthwise_separable(
-            input,
-            num_filters1=256,
-            num_filters2=512,
-            num_groups=256,
-            stride=2,
-            scale=scale,
-            name=self.name + "_conv4_2")
-
-        # 14x14
-        for i in range(5):
-            input = self.depthwise_separable(
-                input,
-                num_filters1=512,
-                num_filters2=512,
-                num_groups=512,
-                stride=1,
-                scale=scale,
-                name=self.name + "_conv5" + "_" + str(i + 1))
-        # 7x7
-        input = self.depthwise_separable(
-            input,
-            num_filters1=512,
-            num_filters2=1024,
-            num_groups=512,
-            stride=2,
-            scale=scale,
-            name=self.name + "_conv5_6")
-
-        input = self.depthwise_separable(
-            input,
-            num_filters1=1024,
-            num_filters2=1024,
-            num_groups=1024,
-            stride=1,
-            scale=scale,
-            name=self.name + "_conv6")
-
-        input = fluid.layers.pool2d(
-            input=input,
-            pool_size=0,
-            pool_stride=1,
-            pool_type='avg',
-            global_pooling=True)
-
-        output = fluid.layers.fc(
-            input=input,
-            size=class_dim,
-            act='softmax',
-            param_attr=ParamAttr(
-                initializer=MSRA(), name=self.name + "_fc7_weights"),
-            bias_attr=ParamAttr(name=self.name + "_fc7_offset"),
-            name=self.name)
-        return output
-
-    def conv_bn_layer(self,
-                      input,
-                      filter_size,
-                      num_filters,
-                      stride,
-                      padding,
-                      channels=None,
-                      num_groups=1,
-                      act='relu',
-                      use_cudnn=True,
-                      name=None):
-        conv = fluid.layers.conv2d(
-            input=input,
-            num_filters=num_filters,
-            filter_size=filter_size,
-            stride=stride,
-            padding=padding,
-            groups=num_groups,
-            act=None,
-            use_cudnn=use_cudnn,
-            param_attr=ParamAttr(
-                initializer=MSRA(), name=name + "_weights"),
-            name=name,
-            bias_attr=False)
-        bn_name = name + "_bn"
-        return fluid.layers.batch_norm(
-            input=conv,
-            act=act,
-            name=name,
-            param_attr=ParamAttr(name=bn_name + "_scale"),
-            bias_attr=ParamAttr(name=bn_name + "_offset"),
-            moving_mean_name=bn_name + '_mean',
-            moving_variance_name=bn_name + '_variance')
-
-    def depthwise_separable(self,
-                            input,
-                            num_filters1,
-                            num_filters2,
-                            num_groups,
-                            stride,
-                            scale,
-                            name=None):
-        depthwise_conv = self.conv_bn_layer(
-            input=input,
-            filter_size=3,
-            num_filters=int(num_filters1 * scale),
-            stride=stride,
-            padding=1,
-            num_groups=int(num_groups * scale),
-            use_cudnn=False,
-            name=name + "_dw")
-
-        pointwise_conv = self.conv_bn_layer(
-            input=depthwise_conv,
-            filter_size=1,
-            num_filters=int(num_filters2 * scale),
-            stride=1,
-            padding=0,
-            name=name + "_sep")
-        return pointwise_conv
--- a/python/paddle/fluid/contrib/slim/tests/quantization/compress.yaml
+++ b/python/paddle/fluid/contrib/slim/tests/quantization/compress.yaml
-#start_epoch(int): The epoch to insert quantization operators. default: 0
-#
-#end_epoch(int): The epoch to save inference model. default: 0
-#
-#float_model_save_path(str): The path to save model with float weights.
-#                None means it doesn't save float model. default: None.
-#
-#mobile_model_save_path(str): The path to save model for paddle-mobile execution.
-#                None means it doesn't save mobile model. default: None.
-#
-#int8_model_save_path(str): The path to save model with int8_t weight.
-#                None means it doesn't save int8 model. default: None.
-#
-#activation_bits(int): quantization bit number for activation. default: 8.
-#
-#weight_bits(int): quantization bit number for weights. The bias is not quantized.
-#                  default: 8.
-#
-#activation_quantize_type(str): quantization type for activation,
-#    now support 'abs_max', 'range_abs_max' and 'moving_average_abs_max'.
-#    If use 'abs_max' mode, the quantization scale will be calculated
-#    dynamically each step in both training and testing period. If use
-#    'range_abs_max', a static quantization scale will be calculated
-#    during training and used in inference.
-#
-#save_in_nodes(list<str>): A list of variable names used to prune graph
-#                          for saving inference model.
-#
-#save_out_nodes(list<str>): A list of variable names used to prune graph
-#                                      for saving inference model.
-version: 1.0
-strategies:
-    quantization_strategy:
-        class: 'QuantizationStrategy'
-        start_epoch: 0
-        end_epoch: 0
-        float_model_save_path: './output/float'
-        mobile_model_save_path: './output/mobile'
-        int8_model_save_path: './output/int8'
-        weight_bits: 8
-        activation_bits: 8
-        weight_quantize_type: 'abs_max'
-        activation_quantize_type: 'abs_max'
-        save_in_nodes: ['image']
-        save_out_nodes: ['quan.tmp_2']
-compressor:
-    epoch: 1
-    checkpoint_path: './checkpoints_quan/'
-    strategies:
-        - quantization_strategy
--- a/python/paddle/fluid/contrib/slim/tests/quantization/compress_1.yaml
+++ b/python/paddle/fluid/contrib/slim/tests/quantization/compress_1.yaml
-#start_epoch(int): The epoch to insert quantization operators. default: 0
-#
-#end_epoch(int): The epoch to save inference model. default: 0
-#
-#float_model_save_path(str): The path to save model with float weights.
-#                None means it doesn't save float model. default: None.
-#
-#mobile_model_save_path(str): The path to save model for paddle-mobile execution.
-#                None means it doesn't save mobile model. default: None.
-#
-#int8_model_save_path(str): The path to save model with int8_t weight.
-#                None means it doesn't save int8 model. default: None.
-#
-#activation_bits(int): quantization bit number for activation. default: 8.
-#
-#weight_bits(int): quantization bit number for weights. The bias is not quantized.
-#                  default: 8.
-#
-#activation_quantize_type(str): quantization type for activation,
-#    now support 'abs_max', 'range_abs_max' and 'moving_average_abs_max'.
-#    If use 'abs_max' mode, the quantization scale will be calculated
-#    dynamically each step in both training and testing period. If use
-#    'range_abs_max', a static quantization scale will be calculated
-#    during training and used in inference.
-#
-#save_in_nodes(list<str>): A list of variable names used to prune graph
-#                          for saving inference model.
-#
-#save_out_nodes(list<str>): A list of variable names used to prune graph
-#                                      for saving inference model.
-version: 1.0
-strategies:
-    quantization_strategy:
-        class: 'QuantizationStrategy'
-        start_epoch: 0
-        end_epoch: 0
-        float_model_save_path: './output/float'
-        mobile_model_save_path: './output/mobile'
-        int8_model_save_path: './output/int8'
-        weight_bits: 8
-        activation_bits: 8
-        weight_quantize_type: 'abs_max'
-        activation_quantize_type: 'abs_max'
-        save_in_nodes: ['image']
-        save_out_nodes: ['quan.tmp_2']
-compressor:
-    epoch: 2
-    checkpoint_path: './checkpoints_quan/'
-    strategies:
-        - quantization_strategy
--- a/python/paddle/fluid/contrib/slim/tests/quantization/compress_2.yaml
+++ b/python/paddle/fluid/contrib/slim/tests/quantization/compress_2.yaml
-#start_epoch(int): The epoch to insert quantization operators. default: 0
-#
-#end_epoch(int): The epoch to save inference model. default: 0
-#
-#float_model_save_path(str): The path to save model with float weights.
-#                None means it doesn't save float model. default: None.
-#
-#mobile_model_save_path(str): The path to save model for paddle-mobile execution.
-#                None means it doesn't save mobile model. default: None.
-#
-#int8_model_save_path(str): The path to save model with int8_t weight.
-#                None means it doesn't save int8 model. default: None.
-#
-#activation_bits(int): quantization bit number for activation. default: 8.
-#
-#weight_bits(int): quantization bit number for weights. The bias is not quantized.
-#                  default: 8.
-#
-#activation_quantize_type(str): quantization type for activation,
-#    now support 'abs_max', 'range_abs_max' and 'moving_average_abs_max'.
-#    If use 'abs_max' mode, the quantization scale will be calculated
-#    dynamically each step in both training and testing period. If use
-#    'range_abs_max', a static quantization scale will be calculated
-#    during training and used in inference.
-#
-#save_in_nodes(list<str>): A list of variable names used to prune graph
-#                          for saving inference model.
-#
-#save_out_nodes(list<str>): A list of variable names used to prune graph
-#                                      for saving inference model.
-version: 1.0
-strategies:
-    quantization_strategy:
-        class: 'QuantizationStrategy'
-        start_epoch: 0
-        end_epoch: 0
-        float_model_save_path: './output/float'
-        mobile_model_save_path: './output/mobile'
-        int8_model_save_path: './output/int8'
-        weight_bits: 8
-        activation_bits: 8
-        weight_quantize_type: 'abs_max'
-        activation_quantize_type: 'abs_max'
-        save_in_nodes: ['image']
-        save_out_nodes: ['quan.tmp_2']
-compressor:
-    epoch: 2
-    checkpoint_path: './checkpoints_quan_2/'
-    strategies:
-        - quantization_strategy
--- a/python/paddle/fluid/contrib/slim/tests/quantization/config_mkldnn_int8.yaml
+++ b/python/paddle/fluid/contrib/slim/tests/quantization/config_mkldnn_int8.yaml
-#int8_model_save_path(str): int8_model_save_path is used to save an int8 ProgramDesc with
-#                fp32 weights which is used for MKL-DNN int8 inference. For post training quantization,
-#                MKLDNNPostTrainingQuantStrategy only supports converting a fp32 ProgramDesc
-#                with fp32 weights to an int8 ProgramDesc with fp32 weights now. The saved
-#                int8 ProgramDesc with fp32 weights only can be executed with MKL-DNN enabled.
-#                None means it doesn't save int8 ProgramDesc with fp32 weights. default: None.
-#
-#fp32_model_path(str): fp32_model_path is used to load an original fp32 ProgramDesc with fp32 weights.
-#                None means it doesn't have a fp32 ProgramDesc with fp32 weights. default: None.
-#
-#cpu_math_library_num_threads(int): The number of cpu math library threads which is used on
-#                MKLDNNPostTrainingQuantStrategy. 1 means it only uses one cpu math library
-#                thread. default: 1
-#                Note: Here we set the cpu_math_library_num_threads to 4 which is the maximum number of
-#                cpu math library threads on CI machine.
-#
-version: 1.0
-strategies:
-    mkldnn_post_training_strategy:
-        class: 'MKLDNNPostTrainingQuantStrategy'
-        int8_model_save_path: 'OUTPUT_PATH'
-        fp32_model_path: 'MODEL_PATH'
-        cpu_math_library_num_threads: 4
-compressor:
-    epoch: 0
-    checkpoint_path: ''
-    strategies:
-        - mkldnn_post_training_strategy
--- a/python/paddle/fluid/contrib/slim/tests/slim_int8_mkldnn_post_training_quantization.md
+++ b/python/paddle/fluid/contrib/slim/tests/slim_int8_mkldnn_post_training_quantization.md
-# PaddleSlim Post-training quantization (MKL-DNN INT8)
-
-This document describes how to use [PaddleSlim](https://github.com/PaddlePaddle/models/blob/develop/PaddleSlim/docs/usage.md) to convert a FP32 ProgramDesc with FP32 weights to an INT8 ProgramDesc with FP32 weights on GoogleNet, MobileNet-V1, MobileNet-V2, ResNet-101, ResNet-50, VGG16 and VGG19. We provide the instructions on how to enable MKL-DNN INT8 calibration in PaddleSlim and show the results of accuracy on all the 7 models as mentioned.
-
-## 0. Prerequisite
-
-You need to install at least PaddlePaddle-1.5 python package `pip install paddlepaddle==1.5`.
-
-## 1. How to generate INT8 ProgramDesc with FP32 weights
-
-You can refer to the usage doc of [PaddleSlim](https://github.com/PaddlePaddle/models/blob/develop/PaddleSlim/docs/usage.md) in section 1.2 for details that how to use PaddleSlim Compressor. But for PaddleSlim Post-training quantization with MKL-DNN INT8, there are two differences.
-
-* Differences in `paddle.fluid.contrib.slim.Compressor` arguments
-
-Since the only one requirement in PaddleSlim Post-training quantization with MKL-DNN INT8 is the reader of warmup dataset, so you need to set other parameters of `paddle.fluid.contrib.slim.Compressor` to None, [] or ''.
-
-```python
-com_pass = Compressor(
-    place=None, # not required, set to None
-    scope=None, # not required, set to None
-    train_program=None, # not required, set to None
-    train_reader=None, # not required, set to None
-    train_feed_list=[], # not required, set to []
-    train_fetch_list=[], # not required, set to []
-    eval_program=None, # not required, set to None
-    eval_reader=reader, # required, the reader of warmup dataset
-    eval_feed_list=[], # not required, set to []
-    eval_fetch_list=[], # not required, set to []
-    teacher_programs=[], # not required, set to []
-    checkpoint_path='', # not required, set to ''
-    train_optimizer=None, # not required, set to None
-    distiller_optimizer=None # not required, set to None
-    )
-```
-
-* Differences in yaml config
-
-An example yaml config is listed below, for more details, you can refer to [config_mkldnn_int8.yaml](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/contrib/slim/tests/quantization/config_mkldnn_int8.yaml) which is used in unit test.
-
-```yaml
-version: 1.0
-strategies:
-    mkldnn_post_training_strategy:
-        class: 'MKLDNNPostTrainingQuantStrategy' # required, class name of MKL-DNN INT8 Post-training quantization strategy
-        int8_model_save_path: 'OUTPUT_PATH' # required, int8 ProgramDesc with fp32 weights
-        fp32_model_path: 'MODEL_PATH' # required, fp32 ProgramDesc with fp32 weights
-        cpu_math_library_num_threads: 1 # required, The number of cpu math library threads
-compressor:
-    epoch: 0 # not required, set to 0
-    checkpoint_path: '' # not required, set to ''
-    strategies:
-        - mkldnn_post_training_strategy
-```
-
-## 2. How to run INT8 ProgramDesc with fp32 weights
-
-You can load INT8 ProgramDesc with fp32 weights by load_inference_model [API](https://github.com/PaddlePaddle/Paddle/blob/8b50ad80ff6934512d3959947ac1e71ea3fb9ea3/python/paddle/fluid/io.py#L991) and run INT8 inference similar as [FP32](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/object_detection/eval.py "FP32").
-
-```python
-[infer_program, feed_dict, fetch_targets] = fluid.io.load_inference_model(model_path, exe)
-```
-
-## 3. Result
-
-We provide the results of accuracy measured on Intel(R) Xeon(R) Gold 6271.
-
->**I. Top-1 Accuracy on Intel(R) Xeon(R) Gold 6271**
-
->**Dataset: ILSVRC2012 Validation dataset**
-
-| Model        | FP32 Accuracy   | INT8 Accuracy   | Accuracy Diff(FP32-INT8)   |
-| :----------: | :-------------: | :------------:  | :--------------:           |
-| GoogleNet    |  70.50%         |  69.81%         |   0.69%                    |
-| MobileNet-V1 |  70.78%         |  70.42%         |   0.36%                    |
-| MobileNet-V2 |  71.90%         |  71.35%         |   0.55%                    |
-| ResNet-101   |  77.50%         |  77.42%         |   0.08%                    |
-| ResNet-50    |  76.63%         |  76.52%         |   0.11%                    |
-| VGG16        |  72.08%         |  72.03%         |   0.05%                    |
-| VGG19        |  72.57%         |  72.55%         |   0.02%                    |
-
-Notes:
-
-* MKL-DNN and MKL are required.
-
-## 4. How to reproduce the results
-
-Three steps to reproduce the above-mentioned accuracy results, and we take GoogleNet benchmark as an example:
-
-* ### Prepare dataset
-
-You can run the following commands to download and preprocess the ILSVRC2012 Validation dataset.
-
-```bash
-cd /PATH/TO/PADDLE
-python ./paddle/fluid/inference/tests/api/full_ILSVRC2012_val_preprocess.py
-```
-
-Then the ILSVRC2012 Validation dataset will be preprocessed and saved by default in `~/.cache/paddle/dataset/int8/download/int8_full_val.bin`
-
-* ### Prepare model
-
-You can run the following commands to download GoogleNet model.
-
-```bash
-mkdir -p /PATH/TO/DOWNLOAD/MODEL/
-cd /PATH/TO/DOWNLOAD/MODEL/
-export MODEL_NAME=GoogleNet
-wget http://paddle-inference-dist.bj.bcebos.com/int8/${MODEL_NAME}_int8_model.tar.gz
-mkdir -p ${MODEL_NAME}
-tar -xvf ${MODEL_NAME}_int8_model.tar.gz -C ${MODEL_NAME}
-```
-
-To download and verify all the 7 models, you need to set `MODEL_NAME` to one of the following values in command line:
-
-```text
-MODEL_NAME=GoogleNet, mobilenetv1, mobilenet_v2, Res101, resnet50, VGG16, VGG19
-```
-
-* ### Commands to reproduce benchmark
-
-You can run `test_mkldnn_int8_quantization_strategy.py` with the following arguments to reproduce the accuracy result on GoogleNet.
-
-``` bash
-cd /PATH/TO/PADDLE/python/paddle/fluid/contrib/slim/tests/
-python ./test_mkldnn_int8_quantization_strategy.py --infer_model /PATH/TO/DOWNLOAD/MODEL/${MODEL_NAME}/model --infer_data ~/.cache/paddle/dataset/int8/download/int8_full_val.bin --warmup_batch_size 100 --batch_size 1
-```
-
-Notes:
-
-* The above commands will cost maybe several hours in the prediction stage (include int8 prediction and fp32 prediction) since there have 50000 pictures need to be predicted in `int8_full_val.bin`
-* Running the above command with environment variable `FLAGS_use_mkldnn=true` will make the FP32 part of the test running using MKL-DNN (the INT8 part uses MKL-DNN either way).
--- a/python/paddle/fluid/contrib/slim/tests/test_auto_pruning.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_auto_pruning.py
-#   copyright (c) 2019 paddlepaddle authors. all rights reserved.
-#
-# licensed under the apache license, version 2.0 (the "license");
-# you may not use this file except in compliance with the license.
-# you may obtain a copy of the license at
-#
-#     http://www.apache.org/licenses/license-2.0
-#
-# unless required by applicable law or agreed to in writing, software
-# distributed under the license is distributed on an "as is" basis,
-# without warranties or conditions of any kind, either express or implied.
-# see the license for the specific language governing permissions and
-# limitations under the license.
-
-import paddle
-import unittest
-import paddle.fluid as fluid
-from mobilenet import MobileNet
-from paddle.fluid.contrib.slim.core import Compressor
-from paddle.fluid.contrib.slim.graph import GraphWrapper
-
-
-class TestFilterPruning(unittest.TestCase):
-    def test_compression(self):
-        """
-        Model: mobilenet_v1
-        data: mnist
-        step1: Training one epoch
-        step2: pruning flops
-        step3: fine-tune one epoch
-        step4: check top1_acc.
-        """
-        if not fluid.core.is_compiled_with_cuda():
-            return
-        class_dim = 10
-        image_shape = [1, 28, 28]
-        image = fluid.layers.data(
-            name='image', shape=image_shape, dtype='float32')
-        image.stop_gradient = False
-        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-        out = MobileNet("auto_pruning").net(input=image, class_dim=class_dim)
-        acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
-        acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
-        val_program = fluid.default_main_program().clone(for_test=False)
-
-        cost = fluid.layers.cross_entropy(input=out, label=label)
-        avg_cost = fluid.layers.mean(x=cost)
-
-        optimizer = fluid.optimizer.Momentum(
-            momentum=0.9,
-            learning_rate=0.01,
-            regularization=fluid.regularizer.L2Decay(4e-5))
-
-        place = fluid.CUDAPlace(0)
-        exe = fluid.Executor(place)
-        exe.run(fluid.default_startup_program())
-
-        val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128)
-
-        val_feed_list = [('img', image.name), ('label', label.name)]
-        val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5',
-                                                        acc_top5.name)]
-
-        train_reader = paddle.batch(
-            paddle.dataset.mnist.train(), batch_size=128)
-        train_feed_list = [('img', image.name), ('label', label.name)]
-        train_fetch_list = [('loss', avg_cost.name)]
-
-        com_pass = Compressor(
-            place,
-            fluid.global_scope(),
-            fluid.default_main_program(),
-            train_reader=train_reader,
-            train_feed_list=train_feed_list,
-            train_fetch_list=train_fetch_list,
-            eval_program=val_program,
-            eval_reader=val_reader,
-            eval_feed_list=val_feed_list,
-            eval_fetch_list=val_fetch_list,
-            train_optimizer=optimizer)
-        com_pass.config('./auto_pruning/compress.yaml')
-        eval_graph = com_pass.run()
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/python/paddle/fluid/contrib/slim/tests/test_compressor.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_compressor.py
-#   copyright (c) 2019 paddlepaddle authors. all rights reserved.
-#
-# licensed under the apache license, version 2.0 (the "license");
-# you may not use this file except in compliance with the license.
-# you may obtain a copy of the license at
-#
-#     http://www.apache.org/licenses/license-2.0
-#
-# unless required by applicable law or agreed to in writing, software
-# distributed under the license is distributed on an "as is" basis,
-# without warranties or conditions of any kind, either express or implied.
-# see the license for the specific language governing permissions and
-# limitations under the license.
-
-import paddle
-import unittest
-import os
-import numpy as np
-import paddle.fluid as fluid
-from paddle.fluid.contrib.slim.core import Compressor
-from paddle.fluid.contrib.slim.graph import GraphWrapper
-
-
-class TestCompressor(unittest.TestCase):
-    def test_eval_func(self):
-        class_dim = 10
-        image_shape = [1, 28, 28]
-        image = fluid.layers.data(
-            name='image', shape=image_shape, dtype='float32')
-        image.stop_gradient = False
-        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-        out = fluid.layers.fc(input=image, size=class_dim)
-        out = fluid.layers.softmax(out)
-        acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
-        val_program = fluid.default_main_program().clone(for_test=False)
-
-        cost = fluid.layers.cross_entropy(input=out, label=label)
-        avg_cost = fluid.layers.mean(x=cost)
-
-        optimizer = fluid.optimizer.Momentum(
-            momentum=0.9,
-            learning_rate=0.01,
-            regularization=fluid.regularizer.L2Decay(4e-5))
-
-        place = fluid.CPUPlace()
-        exe = fluid.Executor(place)
-        exe.run(fluid.default_startup_program())
-
-        val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128)
-
-        train_reader = paddle.batch(
-            paddle.dataset.mnist.train(), batch_size=128)
-        train_feed_list = [('img', image.name), ('label', label.name)]
-        train_fetch_list = [('loss', avg_cost.name)]
-        eval_feed_list = [('img', image.name), ('label', label.name)]
-        eval_fetch_list = [('acc_top1', acc_top1.name)]
-
-        def eval_func(program, scope):
-            place = fluid.CPUPlace()
-            exe = fluid.Executor(place)
-            feeder = fluid.DataFeeder(
-                feed_list=[image.name, label.name],
-                place=place,
-                program=program)
-            results = []
-            for data in val_reader():
-                result = exe.run(program=program,
-                                 scope=scope,
-                                 fetch_list=[acc_top1.name],
-                                 feed=feeder.feed(data))
-                results.append(np.array(result))
-            result = np.mean(results)
-            return result
-
-        com_pass = Compressor(
-            place,
-            fluid.global_scope(),
-            fluid.default_main_program(),
-            train_reader=train_reader,
-            train_feed_list=train_feed_list,
-            train_fetch_list=train_fetch_list,
-            eval_program=val_program,
-            eval_feed_list=eval_feed_list,
-            eval_fetch_list=eval_fetch_list,
-            eval_func={"score": eval_func},
-            prune_infer_model=[[image.name], [out.name]],
-            train_optimizer=optimizer)
-        com_pass.config('./configs/compress.yaml')
-        com_pass.run()
-        self.assertTrue('score' in com_pass.context.eval_results)
-        self.assertTrue(float(com_pass.context.eval_results['score'][0]) > 0.9)
-        self.assertTrue(os.path.exists("./checkpoints/0/eval_model/__model__"))
-        self.assertTrue(
-            os.path.exists("./checkpoints/0/eval_model/__model__.infer"))
-        self.assertTrue(os.path.exists("./checkpoints/0/eval_model/__params__"))
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/python/paddle/fluid/contrib/slim/tests/test_factory.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_factory.py
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle.fluid.contrib.slim.core import ConfigFactory
-import unittest
-
-
-class TestFactory(unittest.TestCase):
-    def test_parse_pruning(self):
-        factory = ConfigFactory('./configs/filter_pruning.yaml')
-
-        pruner_1 = factory.instance('pruner_1')
-        self.assertEquals(pruner_1.pruning_axis['*'], 0)
-        self.assertEquals(pruner_1.criterions['*'], 'l1_norm')
-
-        strategy = factory.instance('sensitive_pruning_strategy')
-        pruner_1 = strategy.pruner
-        self.assertEquals(pruner_1.criterions['*'], 'l1_norm')
-
-        self.assertEquals(strategy.start_epoch, 0)
-        self.assertEquals(strategy.sensitivities_file,
-                          'mobilenet_acc_top1_sensitive.data')
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/python/paddle/fluid/contrib/slim/tests/test_filter_pruning.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_filter_pruning.py
-#   copyright (c) 2019 paddlepaddle authors. all rights reserved.
-#
-# licensed under the apache license, version 2.0 (the "license");
-# you may not use this file except in compliance with the license.
-# you may obtain a copy of the license at
-#
-#     http://www.apache.org/licenses/license-2.0
-#
-# unless required by applicable law or agreed to in writing, software
-# distributed under the license is distributed on an "as is" basis,
-# without warranties or conditions of any kind, either express or implied.
-# see the license for the specific language governing permissions and
-# limitations under the license.
-
-import paddle
-import unittest
-import paddle.fluid as fluid
-import numpy as np
-from mobilenet import MobileNet
-from paddle.fluid.contrib.slim.core import Compressor
-from paddle.fluid.contrib.slim.graph import GraphWrapper
-
-
-class TestFilterPruning(unittest.TestCase):
-    def test_compression(self):
-        """
-        Model: mobilenet_v1
-        data: mnist
-        step1: Training one epoch
-        step2: pruning flops
-        step3: fine-tune one epoch
-        step4: check top1_acc.
-        """
-        if not fluid.core.is_compiled_with_cuda():
-            return
-        class_dim = 10
-        image_shape = [1, 28, 28]
-        image = fluid.layers.data(
-            name='image', shape=image_shape, dtype='float32')
-        image.stop_gradient = False
-        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-        out = MobileNet().net(input=image, class_dim=class_dim)
-        acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
-        acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
-        val_program = fluid.default_main_program().clone(for_test=False)
-
-        cost = fluid.layers.cross_entropy(input=out, label=label)
-        avg_cost = fluid.layers.mean(x=cost)
-
-        optimizer = fluid.optimizer.Momentum(
-            momentum=0.9,
-            learning_rate=0.01,
-            regularization=fluid.regularizer.L2Decay(4e-5))
-
-        place = fluid.CUDAPlace(0)
-        exe = fluid.Executor(place)
-        exe.run(fluid.default_startup_program())
-
-        val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128)
-
-        val_feed_list = [('img', image.name), ('label', label.name)]
-        val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5',
-                                                        acc_top5.name)]
-
-        train_reader = paddle.batch(
-            paddle.dataset.mnist.train(), batch_size=128)
-        train_feed_list = [('img', image.name), ('label', label.name)]
-        train_fetch_list = [('loss', avg_cost.name)]
-
-        com_pass = Compressor(
-            place,
-            fluid.global_scope(),
-            fluid.default_main_program(),
-            train_reader=train_reader,
-            train_feed_list=train_feed_list,
-            train_fetch_list=train_fetch_list,
-            eval_program=val_program,
-            eval_reader=val_reader,
-            eval_feed_list=val_feed_list,
-            eval_fetch_list=val_fetch_list,
-            train_optimizer=optimizer)
-        com_pass.config('./filter_pruning/compress.yaml')
-        eval_graph = com_pass.run()
-        self.assertTrue(
-            abs((com_pass.context.eval_results['acc_top1'][-1] - 0.969) / 0.969)
-            < 0.02)
-
-    def test_uniform_restore_from_checkpoint(self):
-        np.random.seed(0)
-        self.uniform_restore_from_checkpoint(
-            "./filter_pruning/uniform_restore_0.yaml")
-        acc_0 = self.uniform_restore_from_checkpoint(
-            "./filter_pruning/uniform_restore_1.yaml")
-        np.random.seed(0)
-        acc_1 = self.uniform_restore_from_checkpoint(
-            "./filter_pruning/uniform_restore.yaml")
-        self.assertTrue(abs((acc_0 - acc_1) / acc_1) < 0.001)
-
-    def uniform_restore_from_checkpoint(self, config_file):
-
-        class_dim = 10
-        image_shape = [1, 28, 28]
-
-        train_program = fluid.Program()
-        startup_program = fluid.Program()
-        train_program.random_seed = 10
-        startup_program.random_seed = 10
-
-        with fluid.program_guard(train_program, startup_program):
-            with fluid.unique_name.guard():
-                image = fluid.layers.data(
-                    name='image', shape=image_shape, dtype='float32')
-                image.stop_gradient = False
-                label = fluid.layers.data(
-                    name='label', shape=[1], dtype='int64')
-                out = fluid.layers.conv2d(image, 4, 1)
-                out = fluid.layers.fc(out, size=class_dim)
-                out = fluid.layers.softmax(out)
-                acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
-                acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
-                cost = fluid.layers.cross_entropy(input=out, label=label)
-                avg_cost = fluid.layers.mean(x=cost)
-        val_program = train_program.clone(for_test=False)
-
-        optimizer = fluid.optimizer.Momentum(
-            momentum=0.9,
-            learning_rate=0.01,
-            regularization=fluid.regularizer.L2Decay(4e-5))
-
-        place = fluid.CPUPlace()
-        scope = fluid.Scope()
-        exe = fluid.Executor(place)
-        exe.run(startup_program, scope=scope)
-
-        val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128)
-
-        val_feed_list = [('img', image.name), ('label', label.name)]
-        val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5',
-                                                        acc_top5.name)]
-
-        train_reader = paddle.batch(
-            paddle.dataset.mnist.train(), batch_size=128)
-        train_feed_list = [('img', image.name), ('label', label.name)]
-        train_fetch_list = [('loss', avg_cost.name)]
-
-        com_pass = Compressor(
-            place,
-            scope,
-            train_program,
-            train_reader=train_reader,
-            train_feed_list=train_feed_list,
-            train_fetch_list=train_fetch_list,
-            eval_program=val_program,
-            eval_reader=val_reader,
-            eval_feed_list=val_feed_list,
-            eval_fetch_list=val_fetch_list,
-            train_optimizer=optimizer)
-        com_pass.config(config_file)
-        eval_graph = com_pass.run()
-        return com_pass.context.eval_results['acc_top1'][-1]
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/python/paddle/fluid/contrib/slim/tests/test_graph_wrapper.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_graph_wrapper.py
-#   copyright (c) 2019 paddlepaddle authors. all rights reserved.
-#
-# licensed under the apache license, version 2.0 (the "license");
-# you may not use this file except in compliance with the license.
-# you may obtain a copy of the license at
-#
-#     http://www.apache.org/licenses/license-2.0
-#
-# unless required by applicable law or agreed to in writing, software
-# distributed under the license is distributed on an "as is" basis,
-# without warranties or conditions of any kind, either express or implied.
-# see the license for the specific language governing permissions and
-# limitations under the license.
-
-from __future__ import print_function
-import unittest
-import paddle.fluid as fluid
-import six
-import numpy as np
-from paddle.fluid.contrib.slim.graph import GraphWrapper
-from paddle.fluid import core
-import os
-os.environ['CPU_NUM'] = str(4)
-
-
-def residual_block(num):
-    def conv_bn_layer(input,
-                      ch_out,
-                      filter_size,
-                      stride,
-                      padding,
-                      act='relu',
-                      bias_attr=False):
-        tmp = fluid.layers.conv2d(
-            input=input,
-            filter_size=filter_size,
-            num_filters=ch_out,
-            stride=stride,
-            padding=padding,
-            use_cudnn=False,
-            act=None,
-            bias_attr=bias_attr)
-        return fluid.layers.batch_norm(input=tmp, act=act)
-
-    data = fluid.layers.data(name='image', shape=[1, 8, 8], dtype='float32')
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-    data.stop_gradinet = False
-    hidden = data
-    for _ in six.moves.xrange(num):
-        conv = conv_bn_layer(hidden, 16, 3, 1, 1, act=None, bias_attr=True)
-        short = conv_bn_layer(hidden, 16, 1, 1, 0, act=None)
-        hidden = fluid.layers.elementwise_add(x=conv, y=short, act='relu')
-    fc = fluid.layers.fc(input=hidden, size=10)
-
-    loss = fluid.layers.cross_entropy(input=fc, label=label)
-    loss = fluid.layers.mean(loss)
-    return data, label, loss
-
-
-class TestGraphWrapper(unittest.TestCase):
-    def build_program(self):
-        place = fluid.CPUPlace()
-        if fluid.core.is_compiled_with_cuda():
-            place = fluid.CUDAPlace(0)
-        main = fluid.Program()
-        startup = fluid.Program()
-        with fluid.program_guard(main, startup):
-            image, label, self.loss = residual_block(2)
-            eval_program = main.clone()
-            opt = fluid.optimizer.SGD(learning_rate=0.001)
-            opt.minimize(self.loss)
-        self.scope = core.Scope()
-        exe = fluid.Executor(place)
-        exe.run(startup, scope=self.scope)
-        self.eval_graph = GraphWrapper(
-            program=eval_program,
-            in_nodes={'image': image.name,
-                      'label': label.name},
-            out_nodes={'loss': self.loss.name})
-        self.train_graph = GraphWrapper(
-            program=main,
-            in_nodes={'image': image.name,
-                      'label': label.name},
-            out_nodes={'loss': self.loss.name})
-
-    def test_all_parameters(self):
-        self.build_program()
-        self.assertEquals(len(self.train_graph.all_parameters()), 24)
-
-    def test_all_vars(self):
-        self.build_program()
-        # self.assertEquals(len(self.train_graph.vars()), 90)
-        # activation inplace has been disabled in python side
-        # which may produce more variable in program_desc
-        # update 90 => 94
-        # delete three useless RAW variables in Conv2D
-        # update 94 => 91
-        self.assertEquals(len(self.train_graph.vars()), 91)
-
-    def test_numel_params(self):
-        self.build_program()
-        self.assertEquals(self.train_graph.numel_params(), 13258)
-
-    def test_compile(self):
-        self.build_program()
-        place = fluid.CPUPlace()
-        if fluid.core.is_compiled_with_cuda():
-            place = fluid.CUDAPlace(0)
-        exe = fluid.Executor(place)
-        self.train_graph.compile()
-        exe.run(self.train_graph.compiled_graph,
-                scope=self.scope,
-                feed={
-                    'image':
-                    np.random.randint(0, 40, [16, 1, 8, 8]).astype('float32'),
-                    'label': np.random.randint(0, 10, [16, 1]).astype('int64')
-                })
-
-    def test_pre_and_next_ops(self):
-        self.build_program()
-        for op in self.train_graph.ops():
-            for next_op in self.train_graph.next_ops(op):
-                self.assertTrue(op in self.train_graph.pre_ops(next_op))
-
-    def test_get_optimize_graph(self):
-        self.build_program()
-        place = fluid.CPUPlace()
-        if fluid.core.is_compiled_with_cuda():
-            place = fluid.CUDAPlace(0)
-        opt = fluid.optimizer.SGD(learning_rate=0.001)
-        train_graph = self.eval_graph.get_optimize_graph(
-            opt, place, self.scope, no_grad_var_names=['image'])
-        self.assertEquals(len(self.train_graph.ops()), len(train_graph.ops()))
-        exe = fluid.Executor(place)
-        train_graph.compile()
-        image = np.random.randint(0, 225, [16, 1, 8, 8]).astype('float32')
-        label = np.random.randint(0, 10, [16, 1]).astype('int64')
-        exe.run(train_graph.compiled_graph,
-                scope=self.scope,
-                feed={'image': image,
-                      'label': label})
-
-    def test_get_optimize_graph_without_loss(self):
-        self.build_program()
-        self.eval_graph.out_nodes = {}
-        place = fluid.CPUPlace()
-        if fluid.core.is_compiled_with_cuda():
-            place = fluid.CUDAPlace(0)
-        opt = fluid.optimizer.SGD(learning_rate=0.001)
-        train_graph = self.eval_graph.get_optimize_graph(
-            opt, place, self.scope, no_grad_var_names=['image'])
-        self.assertEquals(train_graph, None)
-
-    def test_flops(self):
-        self.build_program()
-        self.assertEquals(self.train_graph.flops(), 354624)
-
-    def test_merge(self):
-        self.build_program()
-        self.train_graph.merge(self.eval_graph)
-        self.assertEquals(len(self.train_graph.ops()), 72)
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/python/paddle/fluid/contrib/slim/tests/test_light_nas.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_light_nas.py
-#   copyright (c) 2019 paddlepaddle authors. all rights reserved.
-#
-# licensed under the apache license, version 2.0 (the "license");
-# you may not use this file except in compliance with the license.
-# you may obtain a copy of the license at
-#
-#     http://www.apache.org/licenses/license-2.0
-#
-# unless required by applicable law or agreed to in writing, software
-# distributed under the license is distributed on an "as is" basis,
-# without warranties or conditions of any kind, either express or implied.
-# see the license for the specific language governing permissions and
-# limitations under the license.
-"""
-Test LightNAS.
-"""
-import sys
-import unittest
-import paddle.fluid as fluid
-from paddle.fluid.contrib.slim.core import Compressor
-sys.path.append("./light_nas")
-from light_nas_space import LightNASSpace
-
-
-class TestLightNAS(unittest.TestCase):
-    """
-    Test LightNAS.
-    """
-
-    def test_compression(self):
-        """
-        Test LightNAS.
-        """
-        # Update compress.yaml
-        lines = list()
-        fid = open('./light_nas/compress.yaml')
-        for line in fid:
-            if 'target_latency' in line:
-                lines.append('        target_latency: 0\n')
-            else:
-                lines.append(line)
-        fid.close()
-        fid = open('./light_nas/compress.yaml', 'w')
-        for line in lines:
-            fid.write(line)
-        fid.close()
-
-        # Begin test
-        if not fluid.core.is_compiled_with_cuda():
-            return
-
-        space = LightNASSpace()
-
-        startup_prog, train_prog, test_prog, train_metrics, test_metrics, train_reader, test_reader = space.create_net(
-        )
-        train_cost, train_acc1, train_acc5, global_lr = train_metrics
-        test_cost, test_acc1, test_acc5 = test_metrics
-
-        place = fluid.CUDAPlace(0)
-        exe = fluid.Executor(place)
-        exe.run(startup_prog)
-
-        val_fetch_list = [('acc_top1', test_acc1.name),
-                          ('acc_top5', test_acc5.name)]
-        train_fetch_list = [('loss', train_cost.name)]
-
-        com_pass = Compressor(
-            place,
-            fluid.global_scope(),
-            train_prog,
-            train_reader=train_reader,
-            train_feed_list=None,
-            train_fetch_list=train_fetch_list,
-            eval_program=test_prog,
-            eval_reader=test_reader,
-            eval_feed_list=None,
-            eval_fetch_list=val_fetch_list,
-            train_optimizer=None,
-            search_space=space)
-        com_pass.config('./light_nas/compress.yaml')
-        eval_graph = com_pass.run()
-
-    def test_compression_with_target_latency(self):
-        """
-        Test LightNAS with target_latency.
-        """
-        # Update compress.yaml
-        lines = list()
-        fid = open('./light_nas/compress.yaml')
-        for line in fid:
-            if 'target_latency' in line:
-                lines.append('        target_latency: 1\n')
-            else:
-                lines.append(line)
-        fid.close()
-        fid = open('./light_nas/compress.yaml', 'w')
-        for line in lines:
-            fid.write(line)
-        fid.close()
-
-        # Begin test
-        if not fluid.core.is_compiled_with_cuda():
-            return
-
-        space = LightNASSpace()
-
-        startup_prog, train_prog, test_prog, train_metrics, test_metrics, train_reader, test_reader = space.create_net(
-        )
-        train_cost, train_acc1, train_acc5, global_lr = train_metrics
-        test_cost, test_acc1, test_acc5 = test_metrics
-
-        place = fluid.CUDAPlace(0)
-        exe = fluid.Executor(place)
-        exe.run(startup_prog)
-
-        val_fetch_list = [('acc_top1', test_acc1.name),
-                          ('acc_top5', test_acc5.name)]
-        train_fetch_list = [('loss', train_cost.name)]
-
-        com_pass = Compressor(
-            place,
-            fluid.global_scope(),
-            train_prog,
-            train_reader=train_reader,
-            train_feed_list=None,
-            train_fetch_list=train_fetch_list,
-            eval_program=test_prog,
-            eval_reader=test_reader,
-            eval_feed_list=None,
-            eval_fetch_list=val_fetch_list,
-            train_optimizer=None,
-            search_space=space)
-        com_pass.config('./light_nas/compress.yaml')
-        eval_graph = com_pass.run()
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/python/paddle/fluid/contrib/slim/tests/test_mkldnn_int8_quantization_strategy.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_mkldnn_int8_quantization_strategy.py
-#   copyright (c) 2019 paddlepaddle authors. all rights reserved.
-#
-# licensed under the apache license, version 2.0 (the "license");
-# you may not use this file except in compliance with the license.
-# you may obtain a copy of the license at
-#
-#     http://www.apache.org/licenses/license-2.0
-#
-# unless required by applicable law or agreed to in writing, software
-# distributed under the license is distributed on an "as is" basis,
-# without warranties or conditions of any kind, either express or implied.
-# see the license for the specific language governing permissions and
-# limitations under the license.
-
-import unittest
-import os
-import sys
-import argparse
-import shutil
-import logging
-import struct
-import six
-import numpy as np
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid.framework import IrGraph
-from paddle.fluid import core
-from paddle.fluid.contrib.slim.core import Compressor
-from paddle.fluid.log_helper import get_logger
-
-_logger = get_logger(
-    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
-
-
-def parse_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--batch_size', type=int, default=1, help='batch size')
-    parser.add_argument(
-        '--infer_model',
-        type=str,
-        default='',
-        help='infer_model is used to load an original fp32 ProgramDesc with fp32 weights'
-    )
-    parser.add_argument('--infer_data', type=str, default='', help='data file')
-    parser.add_argument(
-        '--int8_model_save_path',
-        type=str,
-        default='./output',
-        help='infer_data is used to save an int8 ProgramDesc with fp32 weights')
-    parser.add_argument(
-        '--warmup_batch_size',
-        type=int,
-        default=100,
-        help='batch size for quantization warmup')
-    parser.add_argument(
-        '--accuracy_diff_threshold',
-        type=float,
-        default=0.01,
-        help='accepted accuracy drop threshold.')
-
-    test_args, args = parser.parse_known_args(namespace=unittest)
-
-    return test_args, sys.argv[:1] + args
-
-
-class TestMKLDNNPostTrainingQuantStrategy(unittest.TestCase):
-    """
-    Test API of Post Training quantization strategy for int8 with MKL-DNN.
-    """
-
-    def _reader_creator(self, data_file='data.bin', cycle=False):
-        def reader():
-            with open(data_file, 'rb') as fp:
-                num = fp.read(8)
-                num = struct.unpack('q', num)[0]
-                imgs_offset = 8
-                img_ch = 3
-                img_w = 224
-                img_h = 224
-                img_pixel_size = 4
-                img_size = img_ch * img_h * img_w * img_pixel_size
-                label_size = 8
-                labels_offset = imgs_offset + num * img_size
-                step = 0
-
-                while step < num:
-                    fp.seek(imgs_offset + img_size * step)
-                    img = fp.read(img_size)
-                    img = struct.unpack_from(
-                        '{}f'.format(img_ch * img_w * img_h), img)
-                    img = np.array(img)
-                    img.shape = (img_ch, img_w, img_h)
-                    fp.seek(labels_offset + label_size * step)
-                    label = fp.read(label_size)
-                    label = struct.unpack('q', label)[0]
-                    yield img, int(label)
-                    step += 1
-                    if cycle and step == num:
-                        step = 0
-
-        return reader
-
-    def _update_config_file(self, fp32_model_path, output_path):
-        config_path = './quantization/config_mkldnn_int8.yaml'
-        new_config_path = './quantization/temp.yaml'
-        shutil.copy(config_path, new_config_path)
-
-        with open(new_config_path, 'r+') as fp:
-            data = fp.read()
-        data = data.replace('MODEL_PATH', fp32_model_path)
-        data = data.replace('OUTPUT_PATH', output_path)
-        with open(new_config_path, 'w') as fp:
-            fp.write(data)
-
-        return new_config_path
-
-    def _transform_depthwise_conv(self, graph):
-        '''
-        Transform depthwise_conv2d into conv2d, with MKL-DNN only
-        '''
-        ops = graph.all_op_nodes()
-        for op_node in ops:
-            name = op_node.name()
-            if name in ['depthwise_conv2d']:
-                input_var_node = graph._find_node_by_name(
-                    op_node.inputs, op_node.input("Input")[0])
-                weight_var_node = graph._find_node_by_name(
-                    op_node.inputs, op_node.input("Filter")[0])
-                output_var_node = graph._find_node_by_name(
-                    graph.all_var_nodes(), op_node.output("Output")[0])
-                attrs = {
-                    name: op_node.op().attr(name)
-                    for name in op_node.op().attr_names()
-                }
-
-                conv_op_node = graph.create_op_node(
-                    op_type='conv2d',
-                    attrs=attrs,
-                    inputs={
-                        'Input': input_var_node,
-                        'Filter': weight_var_node
-                    },
-                    outputs={'Output': output_var_node})
-
-                graph.link_to(input_var_node, conv_op_node)
-                graph.link_to(weight_var_node, conv_op_node)
-                graph.link_to(conv_op_node, output_var_node)
-                graph.safe_remove_nodes(op_node)
-
-        return graph
-
-    def _predict(self, test_reader=None, model_path=None):
-        place = fluid.CPUPlace()
-        exe = fluid.Executor(place)
-        inference_scope = fluid.executor.global_scope()
-        with fluid.scope_guard(inference_scope):
-            if os.path.exists(os.path.join(model_path, '__model__')):
-                [inference_program, feed_target_names,
-                 fetch_targets] = fluid.io.load_inference_model(model_path, exe)
-            else:
-                [inference_program, feed_target_names,
-                 fetch_targets] = fluid.io.load_inference_model(
-                     model_path, exe, 'model', 'params')
-
-            use_mkldnn = fluid.core.globals()["FLAGS_use_mkldnn"]
-            if (use_mkldnn):
-                graph = IrGraph(
-                    core.Graph(inference_program.desc), for_test=True)
-                graph = self._transform_depthwise_conv(graph)
-                inference_program = graph.to_program()
-
-            dshape = [3, 224, 224]
-            top1 = 0.0
-            top5 = 0.0
-            total_samples = 0
-            for batch_id, data in enumerate(test_reader()):
-                if six.PY2:
-                    images = map(lambda x: x[0].reshape(dshape), data)
-                if six.PY3:
-                    images = list(map(lambda x: x[0].reshape(dshape), data))
-                images = np.array(images).astype('float32')
-                labels = np.array([x[1] for x in data]).astype("int64")
-                labels = labels.reshape([-1, 1])
-                fluid.core.set_num_threads(int(os.environ['CPU_NUM_THREADS']))
-                out = exe.run(inference_program,
-                              feed={
-                                  feed_target_names[0]: images,
-                                  feed_target_names[1]: labels
-                              },
-                              fetch_list=fetch_targets)
-                fluid.core.set_num_threads(1)
-                top1 += np.sum(out[1]) * len(data)
-                top5 += np.sum(out[2]) * len(data)
-                total_samples += len(data)
-                if (batch_id + 1) % 100 == 0:
-                    _logger.info('{} images have been predicted'.format(
-                        total_samples))
-            return top1 / total_samples, top5 / total_samples
-
-    def _warmup(self, reader=None, config_path=''):
-        com_pass = Compressor(
-            place=None,
-            scope=None,
-            train_program=None,
-            train_reader=None,
-            train_feed_list=[],
-            train_fetch_list=[],
-            eval_program=None,
-            eval_reader=reader,
-            eval_feed_list=[],
-            eval_fetch_list=[],
-            teacher_programs=[],
-            checkpoint_path='',
-            train_optimizer=None,
-            distiller_optimizer=None)
-        com_pass.config(config_path)
-        com_pass.run()
-
-    def _compare_accuracy(self, fp32_acc1, int8_acc1, threshold):
-        _logger.info('--- Accuracy summary ---')
-        _logger.info(
-            'Accepted top1 accuracy drop threshold: {0}. (condition: (FP32_top1_acc - IN8_top1_acc) <= threshold)'
-            .format(threshold))
-        _logger.info('FP32: avg top1 accuracy: {0:.4f}'.format(fp32_acc1))
-        _logger.info('INT8: avg top1 accuracy: {0:.4f}'.format(int8_acc1))
-        assert fp32_acc1 > 0.0
-        assert int8_acc1 > 0.0
-        assert fp32_acc1 - int8_acc1 <= threshold
-
-    def test_compression(self):
-        if not fluid.core.is_compiled_with_mkldnn():
-            return
-
-        int8_model_path = test_case_args.int8_model_save_path
-        data_path = test_case_args.infer_data
-        fp32_model_path = test_case_args.infer_model
-        batch_size = test_case_args.batch_size
-
-        warmup_batch_size = test_case_args.warmup_batch_size
-        accuracy_diff_threshold = test_case_args.accuracy_diff_threshold
-
-        _logger.info(
-            'FP32 & INT8 prediction run: batch_size {0}, warmup batch size {1}.'
-            .format(batch_size, warmup_batch_size))
-
-        #warmup dataset, only use the first batch data
-        warmup_reader = paddle.batch(
-            self._reader_creator(data_path, False),
-            batch_size=warmup_batch_size)
-        config_path = self._update_config_file(fp32_model_path, int8_model_path)
-        self._warmup(warmup_reader, config_path)
-
-        _logger.info('--- INT8 prediction start ---')
-        val_reader = paddle.batch(
-            self._reader_creator(data_path, False), batch_size=batch_size)
-        int8_model_result = self._predict(val_reader, int8_model_path)
-        _logger.info('--- FP32 prediction start ---')
-        val_reader = paddle.batch(
-            self._reader_creator(data_path, False), batch_size=batch_size)
-        fp32_model_result = self._predict(val_reader, fp32_model_path)
-
-        self._compare_accuracy(fp32_model_result[0], int8_model_result[0],
-                               accuracy_diff_threshold)
-
-
-if __name__ == '__main__':
-    global test_case_args
-    test_case_args, remaining_args = parse_args()
-    unittest.main(argv=remaining_args)
--- a/python/paddle/fluid/contrib/slim/tests/test_quantization_strategy.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_quantization_strategy.py
-#   copyright (c) 2019 paddlepaddle authors. all rights reserved.
-#
-# licensed under the apache license, version 2.0 (the "license");
-# you may not use this file except in compliance with the license.
-# you may obtain a copy of the license at
-#
-#     http://www.apache.org/licenses/license-2.0
-#
-# unless required by applicable law or agreed to in writing, software
-# distributed under the license is distributed on an "as is" basis,
-# without warranties or conditions of any kind, either express or implied.
-# see the license for the specific language governing permissions and
-# limitations under the license.
-
-import paddle
-import unittest
-import paddle.fluid as fluid
-from mobilenet import MobileNet
-from paddle.fluid.contrib.slim.core import Compressor
-from paddle.fluid.contrib.slim.graph import GraphWrapper
-
-
-class TestQuantizationStrategy(unittest.TestCase):
-    """
-    Test API of quantization strategy.
-    """
-
-    def test_compression(self):
-        self.quan("./quantization/compress.yaml")
-        self.quan("./quantization/compress_1.yaml")
-
-    def quan(self, config_file):
-        if not fluid.core.is_compiled_with_cuda():
-            return
-        class_dim = 10
-        image_shape = [1, 28, 28]
-
-        train_program = fluid.Program()
-        startup_program = fluid.Program()
-
-        with fluid.program_guard(train_program, startup_program):
-            with fluid.unique_name.guard():
-                image = fluid.layers.data(
-                    name='image', shape=image_shape, dtype='float32')
-                image.stop_gradient = False
-                label = fluid.layers.data(
-                    name='label', shape=[1], dtype='int64')
-                out = MobileNet(name='quan').net(input=image,
-                                                 class_dim=class_dim)
-                print("out: {}".format(out.name))
-                acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
-                acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
-                cost = fluid.layers.cross_entropy(input=out, label=label)
-                avg_cost = fluid.layers.mean(x=cost)
-
-        val_program = train_program.clone(for_test=False)
-
-        optimizer = fluid.optimizer.Momentum(
-            momentum=0.9,
-            learning_rate=0.01,
-            regularization=fluid.regularizer.L2Decay(4e-5))
-
-        scope = fluid.Scope()
-        place = fluid.CUDAPlace(0)
-        exe = fluid.Executor(place)
-        exe.run(startup_program, scope=scope)
-
-        val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128)
-
-        val_feed_list = [('img', image.name), ('label', label.name)]
-        val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5',
-                                                        acc_top5.name)]
-
-        train_reader = paddle.batch(
-            paddle.dataset.mnist.train(), batch_size=128)
-        train_feed_list = [('img', image.name), ('label', label.name)]
-        train_fetch_list = [('loss', avg_cost.name)]
-
-        com_pass = Compressor(
-            place,
-            scope,
-            train_program,
-            train_reader=train_reader,
-            train_feed_list=train_feed_list,
-            train_fetch_list=train_fetch_list,
-            eval_program=val_program,
-            eval_reader=val_reader,
-            eval_feed_list=val_feed_list,
-            eval_fetch_list=val_fetch_list,
-            train_optimizer=optimizer)
-        com_pass.config(config_file)
-        eval_graph = com_pass.run()
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/python/paddle/fluid/contrib/slim/tests/test_reader.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_reader.py
-#   copyright (c) 2019 paddlepaddle authors. all rights reserved.
-#
-# licensed under the apache license, version 2.0 (the "license");
-# you may not use this file except in compliance with the license.
-# you may obtain a copy of the license at
-#
-#     http://www.apache.org/licenses/license-2.0
-#
-# unless required by applicable law or agreed to in writing, software
-# distributed under the license is distributed on an "as is" basis,
-# without warranties or conditions of any kind, either express or implied.
-# see the license for the specific language governing permissions and
-# limitations under the license.
-
-import os
-import paddle
-import unittest
-import paddle.fluid as fluid
-from mobilenet import MobileNet
-from paddle.fluid.contrib.slim.core import Compressor
-from paddle.fluid.contrib.slim.graph import GraphWrapper
-
-
-class TestReader(unittest.TestCase):
-    """
-    Test API of quantization strategy.
-    """
-
-    def set_train_reader(self, image, label, place):
-        train_reader = paddle.batch(
-            paddle.dataset.mnist.train(), batch_size=128)
-        return train_reader
-
-    def set_val_reader(self, image, label, place):
-        val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128)
-        return val_reader
-
-    def set_feed_list(self, image, label):
-        return [('img', image.name), ('label', label.name)]
-
-    def quan(self, config_file):
-        if not fluid.core.is_compiled_with_cuda():
-            return
-        class_dim = 10
-        image_shape = [1, 28, 28]
-
-        train_program = fluid.Program()
-        startup_program = fluid.Program()
-        val_program = fluid.Program()
-
-        with fluid.program_guard(train_program, startup_program):
-            with fluid.unique_name.guard():
-                image = fluid.layers.data(
-                    name='image', shape=image_shape, dtype='float32')
-                image.stop_gradient = False
-                label = fluid.layers.data(
-                    name='label', shape=[1], dtype='int64')
-                out = MobileNet(name='quan').net(input=image,
-                                                 class_dim=class_dim)
-                print("out: {}".format(out.name))
-                acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
-                acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
-                cost = fluid.layers.cross_entropy(input=out, label=label)
-                avg_cost = fluid.layers.mean(x=cost)
-        optimizer = fluid.optimizer.Momentum(
-            momentum=0.9,
-            learning_rate=0.01,
-            regularization=fluid.regularizer.L2Decay(4e-5))
-
-        val_program = train_program.clone(for_test=False)
-
-        place = fluid.CUDAPlace(0)
-        exe = fluid.Executor(place)
-        exe.run(startup_program)
-
-        val_reader = self.set_val_reader(image, label, place)
-
-        val_feed_list = self.set_feed_list(image, label)
-        val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5',
-                                                        acc_top5.name)]
-
-        train_reader = self.set_train_reader(image, label, place)
-        train_feed_list = self.set_feed_list(image, label)
-        train_fetch_list = [('loss', avg_cost.name)]
-
-        com_pass = Compressor(
-            place,
-            fluid.global_scope(),
-            train_program,
-            train_reader=train_reader,
-            train_feed_list=train_feed_list,
-            train_fetch_list=train_fetch_list,
-            eval_program=val_program,
-            eval_reader=val_reader,
-            eval_feed_list=val_feed_list,
-            eval_fetch_list=val_fetch_list,
-            train_optimizer=optimizer)
-        com_pass.config(config_file)
-        eval_graph = com_pass.run()
-
-
-class TestReader1(TestReader):
-    def set_train_reader(self, image, label, place):
-        loader = fluid.io.DataLoader.from_generator(
-            feed_list=[image, label], capacity=16, iterable=True)
-        loader.set_sample_generator(
-            paddle.dataset.mnist.train(), batch_size=128, places=place)
-        return loader
-
-    def set_val_reader(self, image, label, place):
-        loader = fluid.io.DataLoader.from_generator(
-            feed_list=[image, label], capacity=16, iterable=True)
-        loader.set_sample_generator(
-            paddle.dataset.mnist.test(), batch_size=128, places=place)
-        return loader
-
-    def test_compression(self):
-        self.quan("./quantization/compress_2.yaml")
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/python/paddle/fluid/contrib/slim/tests/test_slim_distillation_strategy.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_slim_distillation_strategy.py
-#   copyright (c) 2019 paddlepaddle authors. all rights reserved.
-#
-# licensed under the apache license, version 2.0 (the "license");
-# you may not use this file except in compliance with the license.
-# you may obtain a copy of the license at
-#
-#     http://www.apache.org/licenses/license-2.0
-#
-# unless required by applicable law or agreed to in writing, software
-# distributed under the license is distributed on an "as is" basis,
-# without warranties or conditions of any kind, either express or implied.
-# see the license for the specific language governing permissions and
-# limitations under the license.
-
-import paddle
-import unittest
-import paddle.fluid as fluid
-from mobilenet import MobileNet
-from paddle.fluid.contrib.slim.core import Compressor
-from paddle.fluid.contrib.slim.graph import GraphWrapper
-
-
-class TestDistillationStrategy(unittest.TestCase):
-    """
-    Test API of distillation strategy.
-    """
-
-    def test_compression(self):
-        if not fluid.core.is_compiled_with_cuda():
-            return
-        class_dim = 10
-        image_shape = [1, 28, 28]
-        image = fluid.layers.data(
-            name='image', shape=image_shape, dtype='float32')
-        image.stop_gradient = False
-        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-        out = MobileNet(name="student").net(input=image, class_dim=class_dim)
-        acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
-        acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
-        val_program = fluid.default_main_program().clone(for_test=False)
-
-        cost = fluid.layers.cross_entropy(input=out, label=label)
-        avg_cost = fluid.layers.mean(x=cost)
-
-        optimizer = fluid.optimizer.Momentum(
-            momentum=0.9,
-            learning_rate=fluid.layers.piecewise_decay(
-                boundaries=[5, 10], values=[0.01, 0.001, 0.0001]),
-            regularization=fluid.regularizer.L2Decay(4e-5))
-
-        place = fluid.CUDAPlace(0)
-        exe = fluid.Executor(place)
-        exe.run(fluid.default_startup_program())
-
-        val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128)
-
-        val_feed_list = [('img', image.name), ('label', label.name)]
-        val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5',
-                                                        acc_top5.name)]
-
-        train_reader = paddle.batch(
-            paddle.dataset.mnist.train(), batch_size=128)
-        train_feed_list = [('img', image.name), ('label', label.name)]
-        train_fetch_list = [('loss', avg_cost.name)]
-
-        # define teacher program
-        teacher_program = fluid.Program()
-        startup_program = fluid.Program()
-        with fluid.program_guard(teacher_program, startup_program):
-            img = teacher_program.global_block()._clone_variable(
-                image, force_persistable=False)
-            predict = MobileNet(name="teacher").net(input=img,
-                                                    class_dim=class_dim)
-
-        exe.run(startup_program)
-
-        com_pass = Compressor(
-            place,
-            fluid.global_scope(),
-            fluid.default_main_program(),
-            train_reader=train_reader,
-            train_feed_list=train_feed_list,
-            train_fetch_list=train_fetch_list,
-            eval_program=val_program,
-            eval_reader=val_reader,
-            eval_feed_list=val_feed_list,
-            eval_fetch_list=val_fetch_list,
-            teacher_programs=[teacher_program.clone(for_test=True)],
-            train_optimizer=optimizer,
-            distiller_optimizer=optimizer)
-        com_pass.config('./distillation/compress.yaml')
-        eval_graph = com_pass.run()
-
-
-if __name__ == '__main__':
-    unittest.main()
--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -166,14 +166,8 @@ packages=['paddle',
          'paddle.fluid.contrib.quantize',
          'paddle.fluid.contrib.reader',
          'paddle.fluid.contrib.slim',
-          'paddle.fluid.contrib.slim.core',
-          'paddle.fluid.contrib.slim.graph',
-          'paddle.fluid.contrib.slim.prune',
          'paddle.fluid.contrib.slim.quantization',
          'paddle.fluid.contrib.slim.quantization.imperative',
-          'paddle.fluid.contrib.slim.distillation',
-          'paddle.fluid.contrib.slim.nas',
-          'paddle.fluid.contrib.slim.searcher',
          'paddle.fluid.contrib.utils',
          'paddle.fluid.contrib.extend_optimizer',
          'paddle.fluid.contrib.mixed_precision',