未验证 提交 2131559d 编写于 作者: B Bai Yifan 提交者: GitHub

Remove slim from paddle framework (#25666)

* Remove slim from paddle framework
test=develop
Co-authored-by: Nwanghaoshuang <wanghaoshuang@baidu.com>
上级 bca30316
......@@ -25,7 +25,6 @@ from .quantize import *
from . import reader
from .reader import *
from . import slim
from .slim import *
from . import utils
from .utils import *
from . import extend_optimizer
......@@ -43,7 +42,6 @@ __all__ += memory_usage_calc.__all__
__all__ += op_frequence.__all__
__all__ += quantize.__all__
__all__ += reader.__all__
__all__ += slim.__all__
__all__ += utils.__all__
__all__ += extend_optimizer.__all__
__all__ += ['mixed_precision']
......
......@@ -11,6 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .core import *
__all__ = ['Compressor', ]
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import config
from .config import *
from . import compressor
from .compressor import *
from . import strategy
from .strategy import *
__all__ = config.__all__ + compressor.__all__ + strategy.__all__
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ....core import CPUPlace, EOFException
from .... import compiler
from ....framework import Variable
from .... import io
from .... import profiler
from .... import scope_guard
from ....data_feeder import DataFeeder
from ....log_helper import get_logger
from ....reader import DataLoaderBase
from ..graph import *
from .config import ConfigFactory
import numpy as np
from collections import Iterable
import time
import os
import logging
import sys
import pickle
import functools
import traceback
__all__ = ['Context', 'Compressor']
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
def cached_reader(reader, sampled_rate, cache_path, cached_id):
"""
Sample partial data from reader and cache them into local file system.
Args:
reader: Iterative data source.
sampled_rate(float): The sampled rate used to sample partial data for evaluation. None means using all data in eval_reader. default: None.
cache_path(str): The path to cache the sampled data.
cached_id(int): The id of dataset sampled. Evaluations with same cached_id use the same sampled dataset. default: 0.
"""
np.random.seed(cached_id)
cache_path = os.path.join(cache_path, str(cached_id))
_logger.debug('read data from: {}'.format(cache_path))
def s_reader():
if os.path.isdir(cache_path):
for file_name in open(os.path.join(cache_path, "list")):
yield np.load(
os.path.join(cache_path, file_name.strip()),
allow_pickle=True)
else:
os.makedirs(cache_path)
list_file = open(os.path.join(cache_path, "list"), 'w')
batch = 0
dtype = None
for data in reader():
if batch == 0 or (np.random.uniform() < sampled_rate):
np.save(
os.path.join(cache_path, 'batch' + str(batch)), data)
list_file.write('batch' + str(batch) + '.npy\n')
batch += 1
yield data
return s_reader
class Context(object):
"""
The context in the process of compression.
"""
def __init__(self,
place,
scope,
train_graph=None,
train_reader=None,
eval_graph=None,
eval_reader=None,
teacher_graphs=None,
train_optimizer=None,
distiller_optimizer=None,
search_space=None):
"""
Args:
place: The device place where the compression job running.
scope: The scope used in compression job.
train_graph: The graph with loss as output node.
eval_graph: The graph used for evaluation.
eval_reader: The data reader used for evaluation.
teacher_graphs: The teacher graphs used in distillation strategies.
train_optimizer: The optimizer used to append backward ops and
optimization ops into train_graph.
distiller_optimizer: The optimizer used by distillation strategies.
"""
# The total number of epoches to be trained.
self.epoch = 0
# Current epoch
self.epoch_id = 0
# Current batch
self.batch_id = 0
self.k_v = {}
self.place = place
self.scope = scope
self.train_graph = train_graph
self.train_reader = train_reader
self.eval_graph = eval_graph
self.eval_reader = eval_reader
self.executor = None
self.teacher_graphs = teacher_graphs
self.train_optimizer = train_optimizer
self.distiller_optimizer = distiller_optimizer
self.optimize_graph = None
self.cache_path = './eval_cache'
self.eval_results = {}
self.skip_training = False
self.search_space = search_space
def to_file(self, file_name):
"""
Save the context into file.
"""
data = {}
data['epoch_id'] = self.epoch_id
data['eval_results'] = self.eval_results
with open(file_name, 'wb') as context_file:
pickle.dump(data, context_file)
def from_file(self, file_name):
"""
Load the context from file.
"""
with open(file_name, 'rb') as context_file:
if sys.version_info < (3, 0):
data = pickle.load(context_file)
else:
data = pickle.load(context_file, encoding='bytes')
self.epoch_id = data['epoch_id']
self.eval_results = data['eval_results']
def eval_converged(self, metric_name, delta=0.001):
"""
Check whether the training has been converged.
Args:
metric_name(str): The metric used to check convergence.
delta(float): '(metric[k] - metric[k-1] / metric[k-1]) < delta'
means that the training has been converged.
Returns:
bool: True means the training has been converged.
"""
# TODO(wanghaoshuang@baidu.com): enhence this method.
if (metric_name not in self.eval_results
) or len(self.eval_results[metric_name]) < 2:
return False
results = self.eval_results[metric_name][-2:]
_logger.info('Latest evaluations: {}'.format(results))
return abs(results[1] - results[0]) / results[0] < delta
def run_eval_graph(self, sampled_rate=None, cached_id=0):
"""
Evaluate the current mode in context.
Args:
sampled_rate(float): The sampled rate used to sample partial data
for evaluation. None means using all data in eval_reader. default: None.
cached_id(int): The id of dataset sampled. Evaluations with same
cached_id use the same sampled dataset. default: 0.
"""
_logger.info('Running evaluation')
assert self.eval_graph is not None
assert self.eval_reader is not None
eval_graph = self.eval_graph.clone(for_test=True)
executor = SlimGraphExecutor(self.place)
results = []
batch_id = 0
s_time = time.time()
reader = self.eval_reader
if sampled_rate:
assert (not isinstance(reader, Variable))
assert (sampled_rate > 0)
assert (self.cache_path is not None)
_logger.info('sampled_rate: {}; cached_id: {}'.format(sampled_rate,
cached_id))
reader = cached_reader(reader, sampled_rate, self.cache_path,
cached_id)
if isinstance(reader, Variable) or (
isinstance(reader, DataLoaderBase) and (not reader.iterable)):
reader.start()
try:
while True:
result = executor.run(eval_graph, self.scope)
result = [np.mean(r) for r in result]
results.append(result)
if batch_id % 20 == 0:
_logger.info("batch-{}; {}={}".format(
batch_id, eval_graph.out_nodes.keys(), result))
batch_id += 1
except EOFException:
reader.reset()
else:
for data in reader():
result = executor.run(eval_graph, self.scope, data=data)
result = [np.mean(r) for r in result]
results.append(result)
if batch_id % 20 == 0:
_logger.info("batch-{}; {}={}".format(
batch_id, eval_graph.out_nodes.keys(), result))
batch_id += 1
result = list(np.mean(np.array(results), axis=0))
_logger.info("Final eval result: {}={}".format(
eval_graph.out_nodes.keys(), result))
if not isinstance(result, Iterable):
result = [result]
_logger.info('Finish evaluation')
return result, eval_graph.out_nodes.keys()
def put(self, key, value):
self.k_v[key] = value
def get(self, key):
return self.k_v.get(key)
class Compressor(object):
"""
The pass used to compress model.
"""
def __init__(self,
place,
scope,
train_program,
train_reader=None,
train_feed_list=None,
train_fetch_list=None,
eval_program=None,
eval_reader=None,
eval_feed_list=None,
eval_fetch_list=None,
eval_func=None,
save_eval_model=True,
prune_infer_model=None,
teacher_programs=[],
checkpoint_path=None,
train_optimizer=None,
distiller_optimizer=None,
search_space=None,
log_period=20):
"""
Args:
place(fluid.Place): The device place where the compression job running.
scope(fluid.core.Scope): The scope used to run graph.
train_program(Program): The main program to be compressed. It must have loss op.
train_reader: The data reader used for training.
train_feed_list(dict): A dict to indicate the input variable of the training program.
The key is user-defined and human-readable name.
The value is the name of Variable.
train_fetch_list(dict): A dict to indicate the output variable of the training program.
The key is user-defined and human-readable name.
The value is the name of Variable.
eval_program(Program): The program used for evaluation.
eval_reader: The data reader used for evaluation. It can be None if eval_func is not None.
eval_feed_list(dict): A dict to indicate the input variable of the evaluation program.
The key is user-defined and human-readable name.
The value is the name of Variable.
It can be None if eval_func is not None.
eval_fetch_list(dict): A dict to indicate the output variable of the evaluation program.
The key is user-defined and human-readable name.
The value is the name of Variable.
eval_func(dict|function): Callback functions used to evaluate the compressed model.
The eval_func is a dict, the key is user-defined name and the value is
a callback function. And the score returned from callback functions
can be referenced in config file by the key of eval_func.
The args of callback function are compressed eval_program and scope which
store the compressed parameters.
Default: None.
save_eval_model(bool): Whether to save eval model when saving checkpoints. Default: True.
prune_infer_model(tuple|list): If prune_infer_model is not None, compressor will prune
eval program into inference program according to inputs and outputs
defined in prune_infer_model. prune_infer_model[0] is a list of input
variables' names and prune_infer_model[1] is a list of output variables'
names. If prune_infer_model is None, it will not save inference model.
Default: None.
teacher_programs: The teacher graphs used in distillation strategies.
train_optimizer: The optimizer used to append backward ops and
optimization ops into train_graph.
distiller_optimizer: The optimizer used by distillation strategies. In distillation strategy,
this optimizer is used to minimize the combined loss of student-net and
teacher-net while train_optimizer is used to minimize loss of
student-net in fine-tune stage.
search_space(slim.nas.SearchSpace): The instance that define the searching space. It must inherit
slim.nas.SearchSpace class and overwrite the abstract methods.
log_period(int): The period of print log of training.
"""
assert train_feed_list is None or isinstance(
train_feed_list, list
), "train_feed_list should be a list of tuple, such as [('image', image.name), ('label', gt.name)]"
assert eval_feed_list is None or isinstance(
eval_feed_list, list
), "eval_feed_list should be a list of tuple, such as [('image', image.name), ('label', gt.name)]"
self.strategies = []
self.epoch = 0
self.place = CPUPlace() if place is None else place
self.scope = scope
self.train_graph = GraphWrapper(
train_program, in_nodes=train_feed_list, out_nodes=train_fetch_list)
self.eval_graph = GraphWrapper(
eval_program, in_nodes=eval_feed_list, out_nodes=eval_fetch_list)
self.train_reader = train_reader
self.eval_reader = eval_reader
self.eval_func = eval_func
self.save_eval_model = save_eval_model
self.prune_infer_model = prune_infer_model
self.teacher_graphs = []
for teacher in teacher_programs:
self.teacher_graphs.append(GraphWrapper(teacher))
self.checkpoint = None
self.checkpoint_path = checkpoint_path
self.eval_epoch = 1
self.train_optimizer = train_optimizer
self.distiller_optimizer = distiller_optimizer
self.init_model = None
self.search_space = search_space
self.log_period = log_period
assert (log_period > 0)
def _add_strategy(self, strategy):
"""
Add a strategy to current compress pass.
Args:
strategy: The strategy to be added into current compress pass.
"""
self.strategies.append(strategy)
self.epoch = max(strategy.end_epoch, self.epoch)
def config(self, config_file):
"""
Configure the compress pass from file with yaml format.
Args:
config_file(str): The config file in local file system.
"""
factory = ConfigFactory(config_file)
self.epoch = factory.compressor['epoch']
for strategy in factory.compressor['strategies']:
self._add_strategy(strategy)
if 'checkpoint_path' in factory.compressor:
self.checkpoint_path = factory.compressor['checkpoint_path']
if 'init_model' in factory.compressor:
self.init_model = factory.compressor['init_model']
if 'eval_epoch' in factory.compressor:
self.eval_epoch = factory.compressor['eval_epoch']
assert (self.eval_epoch > 0)
def _init_model(self, context):
"""
Load model that has been compressed.
"""
if self.init_model and os.path.exists(self.init_model):
exe = SlimGraphExecutor(context.place)
with scope_guard(context.scope):
context.train_graph.load_persistables(self.init_model, exe)
flops = context.eval_graph.flops()
conv_flops = context.eval_graph.flops(only_conv=True)
context.eval_graph.update_param_shape(context.scope)
context.eval_graph.update_groups_of_conv()
_logger.info("conv flops: -{}".format(1 - float(
context.eval_graph.flops(only_conv=True)) / conv_flops))
_logger.info("total flops: -{}".format(1 - float(
context.eval_graph.flops()) / flops))
context.train_graph.update_param_shape(context.scope)
context.train_graph.update_groups_of_conv()
context.train_graph.infer_shape()
_logger.info("Init model from: {}".format(self.init_model))
def _load_checkpoint(self, context):
"""
Load checkpoints from file.
"""
_logger.debug('_load_checkpoint')
strategies = self.strategies
if self.checkpoint_path:
if not os.path.exists(self.checkpoint_path):
_logger.warning("Checkpints path doesn't exist: [{}]".format(
self.checkpoint_path))
return context, strategies
checkpoints = [
dir for dir in os.listdir(self.checkpoint_path)
if os.path.isdir(os.path.join(self.checkpoint_path, dir))
]
_logger.debug('self.checkpoint_path: {}'.format(
self.checkpoint_path))
_logger.info('checkpoints: {}'.format(checkpoints))
if len(checkpoints) > 0:
latest = max([int(ck) for ck in checkpoints])
latest_ck_path = os.path.join(self.checkpoint_path, str(latest))
model_path = os.path.join(latest_ck_path, 'model')
context_path = os.path.join(latest_ck_path, 'context')
strategy_path = os.path.join(latest_ck_path, 'strategies')
if os.path.exists(context_path):
context.from_file(context_path)
context.epoch_id += 1
if os.path.exists(strategy_path):
with open(strategy_path, 'rb') as strategy_file:
if sys.version_info < (3, 0):
strategies = pickle.load(strategy_file)
else:
strategies = pickle.load(
strategy_file, encoding='bytes')
assert (len(self.strategies) == len(strategies))
for s, s1 in zip(self.strategies, strategies):
s1.__dict__.update(s.__dict__)
for strategy in strategies:
strategy.restore_from_checkpoint(context)
if os.path.exists(model_path):
exe = SlimGraphExecutor(context.place)
with scope_guard(context.scope):
context.optimize_graph.load_persistables(model_path,
exe)
_logger.info("Loaded params from: {}".format(model_path))
return context, strategies
def _save_checkpoint(self, context):
"""
Save checkpoints to file.
"""
if context.epoch_id % 1 == 0 and self.checkpoint_path:
checkpoint_path = os.path.join(self.checkpoint_path,
str(context.epoch_id))
model_path = os.path.join(checkpoint_path, 'model')
eval_model_path = os.path.join(checkpoint_path, 'eval_model')
context_path = os.path.join(checkpoint_path, 'context')
strategy_path = os.path.join(checkpoint_path, 'strategies')
if not os.path.isdir(model_path):
os.makedirs(model_path)
exe = SlimGraphExecutor(context.place)
with scope_guard(context.scope):
context.optimize_graph.save_persistables(model_path, exe)
if self.save_eval_model:
context.eval_graph.save_model(eval_model_path, exe)
if self.prune_infer_model:
context.eval_graph.save_infer_model(
eval_model_path,
exe,
self.prune_infer_model,
program_only=self.save_eval_model)
context.to_file(context_path)
with open(strategy_path, 'wb') as strategy_file:
pickle.dump(self.strategies, strategy_file)
_logger.info('Saved checkpoint to: {}'.format(checkpoint_path))
def _train_one_epoch(self, context):
"""
Train one epoch.
"""
if context.skip_training:
return
executor = SlimGraphExecutor(self.place)
if context.optimize_graph.compiled_graph is None:
build_strategy = compiler.BuildStrategy()
build_strategy.fuse_all_reduce_ops = False
context.optimize_graph.compiled_graph = compiler.CompiledProgram(
context.optimize_graph.program).with_data_parallel(
loss_name=context.optimize_graph.out_nodes['loss'],
build_strategy=build_strategy)
if isinstance(context.train_reader, Variable) or (
isinstance(context.train_reader, DataLoaderBase) and
(not context.train_reader.iterable)):
context.train_reader.start()
try:
while True:
for strategy in self.strategies:
strategy.on_batch_begin(context)
results = executor.run(context.optimize_graph,
context.scope)
results = [float(np.mean(result)) for result in results]
if context.batch_id % self.log_period == 0:
_logger.info("epoch:{}; batch_id:{}; {} = {}".format(
context.epoch_id, context.batch_id,
context.optimize_graph.out_nodes.keys(
), [round(r, 6) for r in results]))
for strategy in self.strategies:
strategy.on_batch_end(context)
context.batch_id += 1
except EOFException:
context.train_reader.reset()
else:
for data in context.train_reader():
for strategy in self.strategies:
strategy.on_batch_begin(context)
results = executor.run(context.optimize_graph,
context.scope,
data=data)
results = [float(np.mean(result)) for result in results]
if context.batch_id % self.log_period == 0:
_logger.info("epoch:{}; batch_id:{}; {} = {}".format(
context.epoch_id, context.batch_id,
context.optimize_graph.out_nodes.keys(
), [round(r, 6) for r in results]))
for strategy in self.strategies:
strategy.on_batch_end(context)
context.batch_id += 1
context.batch_id = 0
def _eval(self, context):
"""
Runing evaluation.
"""
if self.eval_func is not None:
for key in self.eval_func:
func = self.eval_func[key]
if key not in context.eval_results:
context.eval_results[key] = []
context.eval_results[key].append(
func(self.eval_graph.program, self.scope))
else:
results, names = context.run_eval_graph()
for name, result in zip(names, results):
if name not in context.eval_results:
context.eval_results[name] = []
context.eval_results[name].append(result)
def run(self):
"""
Execute compressing pass.
"""
context = Context(
place=self.place,
scope=self.scope,
train_graph=self.train_graph,
train_reader=self.train_reader,
eval_graph=self.eval_graph,
eval_reader=self.eval_reader,
teacher_graphs=self.teacher_graphs,
train_optimizer=self.train_optimizer,
distiller_optimizer=self.distiller_optimizer,
search_space=self.search_space)
self.context = context
if self.teacher_graphs:
context.put('teachers', self.teacher_graphs)
self._init_model(context)
if not context.optimize_graph:
if context.train_optimizer:
context.train_optimizer._name = 'train_opt'
context.optimize_graph = context.train_graph.get_optimize_graph(
context.train_optimizer, context.place, context.scope)
else:
context.optimize_graph = context.train_graph
context, self.strategies = self._load_checkpoint(context)
for strategy in self.strategies:
strategy.on_compression_begin(context)
if 'MKLDNNPostTrainingQuantStrategy' in [
i.__class__.__name__ for i in self.strategies
]:
return None
start = context.epoch_id
for epoch in range(start, self.epoch):
context.epoch_id = epoch
try:
for strategy in self.strategies:
strategy.on_epoch_begin(context)
self._train_one_epoch(context)
if self.eval_epoch and epoch % self.eval_epoch == 0:
self._eval(context)
self._save_checkpoint(context)
for strategy in self.strategies:
strategy.on_epoch_end(context)
except Exception:
_logger.error(traceback.print_exc())
continue
for strategy in self.strategies:
strategy.on_compression_end(context)
return context.eval_graph
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import inspect
import funcsigs
import yaml
from collections import OrderedDict
from ..prune import *
from ..quantization import *
from .strategy import *
from ..distillation import *
from ..searcher import *
from ..nas import *
__all__ = ['ConfigFactory']
"""This factory is used to create instances by loading and parsing configure file with yaml format.
"""
PLUGINS = ['pruners', 'quantizers', 'distillers', 'strategies', 'controllers']
class ConfigFactory(object):
def __init__(self, config):
"""Init a factory from configure file."""
self.instances = {}
self.compressor = {}
self.version = None
self._parse_config(config)
def instance(self, name):
"""
Get instance from factory.
"""
if name in self.instances:
return self.instances[name]
else:
return None
def _new_instance(self, name, attrs):
if name not in self.instances:
class_ = globals()[attrs['class']]
sig = funcsigs.signature(class_.__init__)
keys = [
param.name for param in sig.parameters.values()
if (param.kind == param.POSITIONAL_OR_KEYWORD)
][1:]
keys = set(attrs.keys()).intersection(set(keys))
args = {}
for key in keys:
value = attrs[key]
if isinstance(value, str) and value.lower() == 'none':
value = None
if isinstance(value, str) and value in self.instances:
value = self.instances[value]
if isinstance(value, list):
for i in range(len(value)):
if isinstance(value[i],
str) and value[i] in self.instances:
value[i] = self.instances[value[i]]
args[key] = value
self.instances[name] = class_(**args)
return self.instances.get(name)
def _parse_config(self, config):
assert config
with open(config, 'r') as config_file:
key_values = self._ordered_load(config_file)
for key in key_values:
# parse version
if key == 'version' and self.version is None:
self.version = int(key_values['version'])
assert self.version == int(key_values['version'])
# parse pruners
if key in PLUGINS:
instances = key_values[key]
for name in instances:
self._new_instance(name, instances[name])
if key == 'compressor':
self.compressor['strategies'] = []
self.compressor['epoch'] = key_values[key]['epoch']
if 'init_model' in key_values[key]:
self.compressor['init_model'] = key_values[key][
'init_model']
if 'checkpoint_path' in key_values[key]:
self.compressor['checkpoint_path'] = key_values[key][
'checkpoint_path']
if 'eval_epoch' in key_values[key]:
self.compressor['eval_epoch'] = key_values[key][
'eval_epoch']
if 'strategies' in key_values[key]:
for name in key_values[key]['strategies']:
strategy = self.instance(name)
self.compressor['strategies'].append(strategy)
if key == 'include':
for config_file in key_values[key]:
self._parse_config(config_file.strip())
def _ordered_load(self,
stream,
Loader=yaml.Loader,
object_pairs_hook=OrderedDict):
"""
See: https://stackoverflow.com/questions/5121931/in-python-how-can-you-load-yaml-mappings-as-ordereddicts
"""
class OrderedLoader(Loader):
pass
def construct_mapping(loader, node):
loader.flatten_mapping(node)
return object_pairs_hook(loader.construct_pairs(node))
OrderedLoader.add_constructor(
yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, construct_mapping)
return yaml.load(stream, OrderedLoader)
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__all__ = ['Strategy']
class Strategy(object):
"""
Base class for all strategies.
"""
def __init__(self, start_epoch=0, end_epoch=0):
"""
Args:
start_epoch: The first epoch to apply the strategy.
end_epoch: The last epoch to apply the strategy.
"""
self.start_epoch = start_epoch
self.end_epoch = end_epoch
def __getstate__(self):
d = {}
for key in self.__dict__:
if key not in ["start_epoch", "end_epoch"]:
d[key] = self.__dict__[key]
return d
def on_compression_begin(self, context):
pass
def on_epoch_begin(self, context):
pass
def on_epoch_end(self, context):
pass
def on_batch_begin(self, context):
pass
def on_batch_end(self, context):
pass
def on_compression_end(self, context):
pass
def restore_from_checkpoint(self, context):
pass
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import distiller
from .distiller import *
from . import distillation_strategy
from .distillation_strategy import *
__all__ = distiller.__all__
__all__ += distillation_strategy.__all__
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ..core.strategy import Strategy
from ....framework import Program, Variable, program_guard
from ....log_helper import get_logger
from .... import Executor
import logging
__all__ = ['DistillationStrategy']
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
class DistillationStrategy(Strategy):
def __init__(self, distillers=None, start_epoch=0, end_epoch=0):
"""
Args:
distillers(list): A list of distiller used to combine student graph and teacher graph
by adding some loss.
start_epoch(int): The epoch when to merge student graph and teacher graph for
distillation training. default: 0
end_epoch(int): The epoch when to finish distillation training. default: 0
"""
super(DistillationStrategy, self).__init__(start_epoch, end_epoch)
self.distillers = distillers
def restore_from_checkpoint(self, context):
# load from checkpoint
if context.epoch_id > 0:
if context.epoch_id > self.start_epoch and context.epoch_id < self.end_epoch:
_logger.info('Restore DistillationStrategy')
self._create_distillation_graph(context)
_logger.info('Restore DistillationStrategy finish.')
def on_epoch_begin(self, context):
if self.start_epoch == context.epoch_id:
_logger.info('DistillationStrategy::on_epoch_begin.')
self._create_distillation_graph(context)
_logger.info('DistillationStrategy set optimize_graph.')
def _create_distillation_graph(self, context):
"""
step 1: Merge student graph and teacher graph into distillation graph.
step 2: Add loss into distillation graph by distillers.
step 3: Append backward ops and optimize ops into distillation graph for training.
"""
# step 1
teacher = context.teacher_graphs[0]
for var in teacher.program.list_vars():
var.stop_gradient = True
graph = context.train_graph.clone()
graph.merge(teacher)
if 'loss' in graph.out_nodes:
graph.out_nodes['student_loss'] = graph.out_nodes['loss']
# step 2
for distiller in self.distillers:
graph = distiller.distiller_loss(graph)
# step 3
startup_program = Program()
with program_guard(graph.program, startup_program):
context.distiller_optimizer._name = 'distillation_optimizer'
# The learning rate variable may be created in other program.
# Update information in optimizer to make
# learning rate variable being accessible in current program.
optimizer = context.distiller_optimizer
if isinstance(optimizer._learning_rate, Variable):
optimizer._learning_rate_map[
graph.program] = optimizer._learning_rate
optimizer.minimize(graph.var(graph.out_nodes['loss'])._var)
exe = Executor(context.place)
exe.run(startup_program, scope=context.scope)
# backup graph for fine-tune after distillation
context.put('distillation_backup_optimize_graph',
context.optimize_graph)
context.optimize_graph = graph
def on_epoch_end(self, context):
if context.epoch_id == (self.end_epoch - 1):
_logger.info('DistillationStrategy::on_epoch_end.')
# restore optimize_graph for fine-tune or other strategy in next stage.
context.optimize_graph = context.get(
'distillation_backup_optimize_graph')
_logger.info(
'DistillationStrategy set context.optimize_graph to None.')
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .... import layers
from .... import optimizer
from .... import Executor
from .... import Program
from .... import program_guard
from .... import regularizer
__all__ = ['FSPDistiller', 'L2Distiller', 'SoftLabelDistiller']
class L2Distiller(object):
"""
Combine two layers from student net and teacher net by l2-loss.
And add the loss into the total loss using for distillation training.
"""
def __init__(self,
student_feature_map,
teacher_feature_map,
distillation_loss_weight=1):
"""
Args:
student_feature_map(str): The name of feature map from student network.
teacher_feature_map(str): The name of feature map from teacher network.
It's shape should be the same with student network.
distillation_loss_weight(float): The weight of the l2-loss.
"""
self.student_feature_map = student_feature_map
self.teacher_feature_map = teacher_feature_map
self.distillation_loss_weight = distillation_loss_weight
def distiller_loss(self, graph):
"""
Modify graph inplace to add l2-loss.
Args:
graph(GraphWrapper): The graph to be modified.
Returns:
GraphWrapper: The modified graph.
"""
distiller_pass = L2DistillerPass(self.student_feature_map,
self.teacher_feature_map,
self.distillation_loss_weight)
dis_graph = distiller_pass.apply(graph)
return dis_graph
class L2DistillerPass(object):
"""
The pass used to add l2-loss.
"""
def __init__(self,
student_feature_map,
teacher_feature_map,
distillation_loss_weight=1):
"""
Args:
student_feature_map(str): The name of feature map from student network.
teacher_feature_map(str): The name of feature map from teacher network.
It's shape should be the same with student network.
distillation_loss_weight(float): The weight of the l2-loss.
"""
self.student_feature_map = student_feature_map
self.teacher_feature_map = teacher_feature_map
self.distillation_loss_weight = distillation_loss_weight
def apply(self, graph):
ret_graph = graph
with program_guard(ret_graph.program):
student_feature_map = ret_graph.var(self.student_feature_map)._var
teacher_feature_map = ret_graph.var(self.teacher_feature_map)._var
l2loss = layers.reduce_mean(
layers.square(student_feature_map - teacher_feature_map))
distillation_loss = l2loss * self.distillation_loss_weight
student_loss = 0
if 'loss' in ret_graph.out_nodes:
student_loss = ret_graph.var(ret_graph.out_nodes['loss'])._var
loss = distillation_loss + student_loss
ret_graph.out_nodes['loss'] = loss.name
ret_graph.out_nodes[
'l2loss_' + self.student_feature_map + "_" +
self.teacher_feature_map] = distillation_loss.name
return ret_graph
class FSPDistiller(object):
"""
Combine layers from student net and teacher net by fsp-loss.
"""
def __init__(self, student_pairs, teacher_pairs,
distillation_loss_weight=1):
"""
Args:
student_pairs(list<tuple>): Each tuple, with two variable names, in student_pairs indicates
a section in student network. The variables in a tuple should
have the same feature map size.
teacher_pairs(list<tuple>): Each tuple, with two variable names, in teacher_pairs indicates
a section in teacher network. The variables in a tuple should
have the same feature map size. Varibale named teacher_pairs[i][j]
should has the save channel number with that of variable named
student_pairs[i][j].
distillation_loss_weight(float): The weight of the fsp-loss. default: 1.
"""
self.student_pairs = student_pairs
self.teacher_pairs = teacher_pairs
self.distillation_loss_weight = distillation_loss_weight
def distiller_loss(self, graph):
"""
Modify graph inplace to add fsp-loss.
Args:
graph(GraphWrapper): The graph to be modified.
Returns:
GraphWrapper: The modified graph.
"""
distiller_pass = FSPDistillerPass(self.student_pairs,
self.teacher_pairs,
self.distillation_loss_weight)
dis_graph = distiller_pass.apply(graph)
return dis_graph
class FSPDistillerPass(object):
'''
Combine layers from student net and teacher net by fsp-loss.
'''
def __init__(self, s_pairs, t_pairs, distillation_loss_weight=1):
"""
Args:
s_pairs(list<tuple>): Each tuple, with two variable names, in student_pairs indicates
a section in student network. The variables in a tuple should
have the same feature map size.
t_pairs(list<tuple>): Each tuple, with two variable names, in teacher_pairs indicates
a section in teacher network. The variables in a tuple should
have the same feature map size. Varibale named teacher_pairs[i][j]
should has the save channel number with that of variable named
student_pairs[i][j].
distillation_loss_weight(float): The weight of the fsp-loss. default: 1.
"""
self.s_pairs = s_pairs
self.t_pairs = t_pairs
self.distillation_loss_weight = distillation_loss_weight
def apply(self, graph):
ret_graph = graph
with program_guard(ret_graph.program):
losses = []
for s_pair, t_pair in zip(self.s_pairs, self.t_pairs):
s_pair_start = ret_graph.var(s_pair[0])._var
s_pair_end = ret_graph.var(s_pair[1])._var
s_fsp_matrix = self._fsp_matrix(s_pair_start, s_pair_end)
t_pair_start = ret_graph.var(t_pair[0])._var
t_pair_end = ret_graph.var(t_pair[1])._var
t_fsp_matrix = self._fsp_matrix(t_pair_start, t_pair_end)
l2_loss = layers.reduce_mean(
layers.square(s_fsp_matrix - t_fsp_matrix))
losses.append(l2_loss)
distillation_loss = layers.sum(
losses) * self.distillation_loss_weight
student_loss = 0
if 'loss' in ret_graph.out_nodes:
student_loss = ret_graph.var(ret_graph.out_nodes['loss'])._var
loss = distillation_loss + student_loss
ret_graph.out_nodes['loss'] = loss.name
ret_graph.out_nodes[
'fsp_distillation_loss'] = distillation_loss.name
return ret_graph
def _fsp_matrix(self, fea_map_0, fea_map_1):
return layers.fsp_matrix(fea_map_0, fea_map_1)
class SoftLabelDistiller(object):
"""
Combine two layers from student net and teacher net by softmax_with_cross_entropy loss.
And add the loss into the total loss using for distillation training.
"""
def __init__(self,
student_feature_map=None,
teacher_feature_map=None,
student_temperature=1.0,
teacher_temperature=1.0,
distillation_loss_weight=1):
"""
Args:
student_feature_map(str): The name of feature map from student network.
teacher_feature_map(str): The name of feature map from teacher network.
It's shape should be the same with student network.
student_temperature(float): Temperature used to divide student_feature_map before softmax_with_cross_entropy. default: 1.0
teacher_temperature(float): Temperature used to divide teacher_feature_map before softmax_with_cross_entropy. default: 1.0
distillation_loss_weight(float): The weight of the l2-loss.
"""
self.student_feature_map = student_feature_map
self.teacher_feature_map = teacher_feature_map
self.distillation_loss_weight = distillation_loss_weight
self.student_temperature = student_temperature
self.teacher_temperature = teacher_temperature
def distiller_loss(self, graph):
"""
Modify graph inplace to add softmax_with_cross_entropy loss.
Args:
graph(GraphWrapper): The graph to be modified.
Returns:
GraphWrapper: The modified graph.
"""
distiller_pass = SoftLabelDistillerPass(
self.student_feature_map, self.teacher_feature_map,
self.student_temperature, self.teacher_temperature,
self.distillation_loss_weight)
dis_graph = distiller_pass.apply(graph)
return dis_graph
class SoftLabelDistillerPass(object):
def __init__(self,
student_feature_map,
teacher_feature_map,
student_temperature,
teacher_temperature,
distillation_loss_weight=1):
"""
Args:
student_feature_map(str): The name of feature map from student network.
teacher_feature_map(str): The name of feature map from teacher network.
It's shape should be the same with student network.
student_temperature(float): Temperature used to divide student_feature_map before softmax_with_cross_entropy.
teacher_temperature(float): Temperature used to divide teacher_feature_map before softmax_with_cross_entropy.
distillation_loss_weight(float): The weight of the l2-loss.
"""
self.student_feature_map = student_feature_map
self.teacher_feature_map = teacher_feature_map
self.student_temperature = student_temperature
self.teacher_temperature = teacher_temperature
self.distillation_loss_weight = distillation_loss_weight
def apply(self, graph):
ret_graph = graph
with program_guard(ret_graph.program):
student_feature_map = ret_graph.var(self.student_feature_map)._var
teacher_feature_map = ret_graph.var(self.teacher_feature_map)._var
s_fea = layers.softmax(student_feature_map /
self.student_temperature)
t_fea = layers.softmax(teacher_feature_map /
self.teacher_temperature)
t_fea.stop_gradient = True
ce_loss = layers.reduce_mean(
layers.cross_entropy(
s_fea, t_fea, soft_label=True))
distillation_loss = ce_loss * self.distillation_loss_weight
student_loss = 0
if 'loss' in ret_graph.out_nodes:
student_loss = ret_graph.var(ret_graph.out_nodes['loss'])._var
loss = distillation_loss + student_loss
ret_graph.out_nodes['loss'] = loss.name
ret_graph.out_nodes[
'soft_label_loss_' + self.student_feature_map + "_" +
self.teacher_feature_map] = distillation_loss.name
return ret_graph
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import executor
from .executor import *
from . import graph_wrapper
from .graph_wrapper import *
__all__ = executor.__all__
__all__ += graph_wrapper.__all__
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ....compiler import CompiledProgram
from ....data_feeder import DataFeeder
from .... import executor
from .graph_wrapper import GraphWrapper
__all__ = ['SlimGraphExecutor']
class SlimGraphExecutor(object):
"""
Wrapper of executor used to run GraphWrapper.
"""
def __init__(self, place):
self.exe = executor.Executor(place)
self.place = place
def run(self, graph, scope, data=None):
"""
Runing a graph with a batch of data.
Args:
graph(GraphWrapper): The graph to be executed.
scope(fluid.core.Scope): The scope to be used.
data(list<tuple>): A batch of data. Each tuple in this list is a sample.
It will feed the items of tuple to the in_nodes of graph.
Returns:
results(list): A list of result with the same order indicated by graph.out_nodes.
"""
assert isinstance(graph, GraphWrapper)
feed = None
if data is not None and isinstance(data[0], dict):
# return list = False
feed = data
elif data is not None:
feeder = DataFeeder(
feed_list=list(graph.in_nodes.values()),
place=self.place,
program=graph.program)
feed = feeder.feed(data)
fetch_list = list(graph.out_nodes.values())
program = graph.compiled_graph if graph.compiled_graph else graph.program
results = self.exe.run(program,
scope=scope,
fetch_list=fetch_list,
feed=feed)
return results
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import OrderedDict
from .... import io
from .... import compiler
from ....framework import Program
from ....framework import program_guard
from ....framework import Parameter
from ....framework import Variable
from ....executor import Executor
import copy
from collections import Iterable
from ....io import save_inference_model, load_inference_model, save_persistables
import numpy as np
import pickle
import os
__all__ = ['GraphWrapper', 'VarWrapper', 'OpWrapper']
OPTIMIZER_OPS = [
'momentum',
'lars_momentum',
'adagrad',
'adam',
'adamax',
'dpsgd',
'decayed_adagrad',
'adadelta',
'rmsprop',
]
class VarWrapper(object):
def __init__(self, var, graph):
assert isinstance(var, Variable)
assert isinstance(graph, GraphWrapper)
self._var = var
self._graph = graph
def __eq__(self, v):
"""
Overwrite this function for ...in... syntax in python.
"""
return self._var.name == v._var.name
def name(self):
"""
Get the name of the variable.
"""
return self._var.name
def shape(self):
"""
Get the shape of the variable.
"""
return self._var.shape
def set_shape(self, shape):
"""
Set the shape of the variable.
"""
self._var.desc.set_shape(shape)
def inputs(self):
"""
Get all the operators that use this variable as output.
Returns:
list<OpWrapper>: A list of operators.
"""
ops = []
for op in self._graph.ops():
if self in op.all_inputs():
ops.append(op)
return ops
def outputs(self):
"""
Get all the operators that use this variable as input.
Returns:
list<OpWrapper>: A list of operators.
"""
ops = []
for op in self._graph.ops():
if self in op.all_outputs():
ops.append(op)
return ops
class OpWrapper(object):
def __init__(self, op, graph):
assert isinstance(graph, GraphWrapper)
self._op = op
self._graph = graph
def __eq__(self, op):
"""
Overwrite this function for ...in... syntax in python.
"""
return self.idx() == op.idx()
def all_inputs(self):
"""
Get all the input variables of this operator.
"""
return [
self._graph.var(var_name) for var_name in self._op.input_arg_names
]
def all_outputs(self):
"""
Get all the output variables of this operator.
"""
return [
self._graph.var(var_name) for var_name in self._op.output_arg_names
]
def idx(self):
"""
Get the id of this operator.
"""
return self._op.idx
def type(self):
"""
Get the type of this operator.
"""
return self._op.type
def is_bwd_op(self):
"""
Whether this operator is backward op.
"""
return self.type().endswith('_grad')
def is_opt_op(self):
"""
Whether this operator is optimizer op.
"""
return self.type() in OPTIMIZER_OPS
def inputs(self, name):
"""
Get all the variables by the input name.
"""
return [self._graph.var(var_name) for var_name in self._op.input(name)]
def outputs(self, name):
"""
Get all the variables by the output name.
"""
return [self._graph.var(var_name) for var_name in self._op.output(name)]
def set_attr(self, key, value):
"""
Set the value of attribute by attribute's name.
Args:
key(str): the attribute name.
value(bool|int|str|float|list): the value of the attribute.
"""
self._op._set_attr(key, value)
def attr(self, name):
"""
Get the attribute by name.
Args:
name(str): the attribute name.
Returns:
bool|int|str|float|list: The attribute value. The return value
can be any valid attribute type.
"""
return self._op.attr(name)
class GraphWrapper(object):
"""
It is a wrapper of paddle.fluid.framework.IrGraph with some special functions
for paddle slim framework.
"""
def __init__(self, program=None, in_nodes=[], out_nodes=[]):
"""
Args:
program(framework.Program): A program with
in_nodes(dict): A dict to indicate the input nodes of the graph.
The key is user-defined and human-readable name.
The value is the name of Variable.
out_nodes(dict): A dict to indicate the input nodes of the graph.
The key is user-defined and human-readable name.
The value is the name of Variable.
"""
super(GraphWrapper, self).__init__()
self.program = Program() if program is None else program
self.persistables = {}
self.teacher_persistables = {}
for var in self.program.list_vars():
if var.persistable:
self.persistables[var.name] = var
self.compiled_graph = None
in_nodes = [] if in_nodes is None else in_nodes
out_nodes = [] if out_nodes is None else out_nodes
self.in_nodes = OrderedDict(in_nodes)
self.out_nodes = OrderedDict(out_nodes)
self._attrs = OrderedDict()
def all_parameters(self):
"""
Get all the parameters in this graph.
Returns:
list<VarWrapper>: A list of VarWrapper instances.
"""
params = []
for block in self.program.blocks:
for param in block.all_parameters():
params.append(VarWrapper(param, self))
return params
def is_parameter(self, var):
"""
Whether the given variable is parameter.
Args:
var(VarWrapper): The given variable.
"""
return isinstance(var._var, Parameter)
def is_persistable(self, var):
"""
Whether the given variable is persistable.
Args:
var(VarWrapper): The given variable.
"""
return var._var.persistable
def compile(self, for_parallel=True, for_test=False, mem_opt=False):
"""
Compile the program in this wrapper to framework.CompiledProgram for next running.
This function must be called if the program is modified.
Args:
for_parallel(bool): Whether the program to run in data parallel way. default: True.
for_test(bool): Whether the compiled program is used for test.
"""
target = self.program
if for_test:
loss = None
else:
loss = self.out_nodes['loss']
if for_parallel:
# disable memory optimize for stable training
build_strategy = compiler.BuildStrategy()
build_strategy.enable_inplace = mem_opt
build_strategy.memory_optimize = mem_opt
build_strategy.fuse_all_reduce_ops = False
# build_strategy.async_mode = False
self.compiled_graph = compiler.CompiledProgram(
target).with_data_parallel(
loss_name=loss, build_strategy=build_strategy)
else:
self.compiled_graph = compiler.CompiledProgram(target)
def ops(self):
"""
Return all operator nodes included in the graph as a set.
"""
ops = []
for block in self.program.blocks:
for op in block.ops:
ops.append(OpWrapper(op, self))
return ops
def vars(self):
"""
Get all the variables.
"""
return [VarWrapper(var, self) for var in self.program.list_vars()]
def var(self, name):
"""
Get the variable by variable name.
"""
return VarWrapper(self.program.global_block().var(name), self)
def clone(self, for_test=False):
"""
Clone a new graph from current graph.
Returns:
(GraphWrapper): The wrapper of a new graph.
"""
return GraphWrapper(
self.program.clone(for_test),
copy.deepcopy(self.in_nodes), copy.deepcopy(self.out_nodes))
def merge(self, graph):
"""
Merge a graph into current graph.
Args:
graph(GraphWrapper): The graph to be merged by current graph.
"""
for var in graph.program.list_vars():
if var.persistable:
self.teacher_persistables[var.name] = var
new_var = self.program.global_block()._clone_variable(
var, force_persistable=False)
new_var.stop_gradient = var.stop_gradient
# TODO: parameters should be cloned
for op in graph.ops():
op = op._op
inputs = {}
outputs = {}
attrs = {}
for input_name in op.input_names:
inputs[input_name] = [
self.var(in_var_name)._var
for in_var_name in op.input(input_name)
]
for output_name in op.output_names:
outputs[output_name] = [
self.var(out_var_name)._var
for out_var_name in op.output(output_name)
]
for attr_name in op.attr_names:
attrs[attr_name] = op.attr(attr_name)
self.program.global_block().append_op(
type=op.type, inputs=inputs, outputs=outputs, attrs=attrs)
def program(self):
"""
Get the program in current wrapper.
"""
return self.program
def pre_ops(self, op):
"""
Get all the previous operators of target operator.
Args:
op(OpWrapper): Target operator..
Returns:
list<OpWrapper>: A list of operators.
"""
ops = []
for p in self.ops():
for in_var in op.all_inputs():
if in_var in p.all_outputs():
ops.append(p)
return ops
def next_ops(self, op):
"""
Get all the next operators of target operator.
Args:
op(OpWrapper): Target operator..
Returns:
list<OpWrapper>: A list of operators.
"""
ops = []
for p in self.ops():
for out_var in op.all_outputs():
if out_var in p.all_inputs():
ops.append(p)
return ops
def get_param_by_op(self, op):
"""
Get the parameters used by target operator.
"""
assert isinstance(op, OpWrapper)
params = []
for var in op.all_inputs():
if isinstance(var._var, Parameter):
params.append(var)
assert len(params) > 0
return params
def numel_params(self):
"""
Get the number of elements in all parameters.
"""
ret = 0
for param in self.all_parameters():
ret += np.product(param.shape())
return ret
def get_optimize_graph(self, optimizer, place, scope, no_grad_var_names=[]):
"""
Get a new graph for training by appending some backward operators and optimization operators.
Args:
optimizer: The optimizer used to generate training graph.
place: The place to run the graph.
scope: The scope used to run the graph. Some new variable will be added into this scope.
no_grad_var_names(list<str>): Names of variables that should be ignored while computing gradients. default: [].
Returns:
(GraphWrapper): The wrapper of new graph with backward ops and optimization ops.
"""
graph = self.clone()
startup_program = Program()
with program_guard(
main_program=graph.program, startup_program=startup_program):
target_name = None
if 'loss' in graph.out_nodes:
target_name = graph.out_nodes['loss']
elif 'cost' in graph.out_nodes:
target_name = graph.out_nodes['cost']
else:
return None
target = graph.var(target_name)._var
# The learning rate variable may be created in other program.
# Update information in optimizer to make
# learning rate variable being accessible in current program.
if isinstance(optimizer._learning_rate, Variable):
optimizer._learning_rate_map[
graph.program] = optimizer._learning_rate
optimizer.minimize(target, no_grad_set=no_grad_var_names)
exe = Executor(place)
exe.run(program=startup_program, scope=scope)
return graph
def flops(self, only_conv=False):
"""
Get the flops of current graph.
Args:
only_conv: Only calculating the conv layers. default: False.
Returns:
int: The flops of current graph.
"""
flops = 0
for op in self.ops():
if op.type() in ['conv2d', 'depthwise_conv2d']:
filter_shape = op.inputs("Filter")[0].shape()
input_shape = op.inputs("Input")[0].shape()
output_shape = op.outputs("Output")[0].shape()
c_out, c_in, k_h, k_w = filter_shape
_, _, h_out, w_out = output_shape
groups = op.attr("groups")
kernel_ops = k_h * k_w * (c_in / groups)
if len(op.inputs("Bias")) > 0:
with_bias = 1
else:
with_bias = 0
flops += 2 * h_out * w_out * c_out * (kernel_ops + with_bias)
elif op.type() == 'pool2d' and not only_conv:
input_shape = op.inputs("X")[0].shape()
output_shape = op.outputs("Out")[0].shape()
_, c_out, h_out, w_out = output_shape
k_size = op.attr("ksize")
flops += h_out * w_out * c_out * (k_size[0]**2)
elif op.type() == 'mul' and not only_conv:
x_shape = list(op.inputs("X")[0].shape())
y_shape = op.inputs("Y")[0].shape()
if x_shape[0] == -1:
x_shape[0] = 1
flops += 2 * x_shape[0] * x_shape[1] * y_shape[1]
elif op.type() in ['relu', 'sigmoid', 'batch_norm'
] and not only_conv:
input_shape = list(op.inputs("X")[0].shape())
if input_shape[0] == -1:
input_shape[0] = 1
flops += np.product(input_shape)
return flops
def save_model(self, path, exe):
"""
Save network and parameters into file which can be load by load_inference_model api.
Args:
path(str): The path to save the persistables.
exe(framework.Executor): The executor used to save the persistables.
"""
out_vars = [
self.var(var_name)._var for var_name in self.out_nodes.values()
]
in_vars = list(self.in_nodes.values())
assert (len(in_vars) > 0)
assert (len(out_vars) > 0)
io.save_inference_model(
path,
in_vars,
out_vars,
exe.exe,
model_filename="__model__",
params_filename="__params__",
main_program=self.program.clone(),
export_for_deployment=True)
def save_infer_model(self, path, exe, in_out, program_only=False):
"""
Save network and parameters into file which can be load by load_inference_model api.
Args:
path(str): The path to save the persistables.
exe(framework.Executor): The executor used to save the persistables.
in_out(tuple|list): in_out[0] is a list of input nodes' names
and in_out[1] is a list of output nodes' names.
program_only(bool): Whether to save program only.
"""
out_vars = [self.var(var_name)._var for var_name in in_out[1]]
in_vars = list(in_out[0])
assert (len(in_vars) > 0)
assert (len(out_vars) > 0)
io.save_inference_model(
path,
in_vars,
out_vars,
exe.exe,
model_filename="__model__.infer",
params_filename="__params__",
program_only=program_only,
main_program=self.program.clone(),
export_for_deployment=True)
def save_persistables(self, path, exe):
"""
Save all the persistable variables into file.
Args:
path(str): The path to save the persistables.
exe(framework.Executor): The executor used to save the persistables.
"""
# update persistables from program
for var in self.program.list_vars():
if var.persistable and var.name not in self.persistables:
self.persistables[var.name] = var
persistables = []
for var in self.persistables:
if 'reader' not in var and 'double_buffer' not in var and var not in self.teacher_persistables:
persistables.append(self.persistables[var])
io.save_vars(exe.exe, path, vars=persistables)
def load_persistables(self, path, exe):
"""
Load the persistable variables from file.
Args:
path(str): The path to load the persistables.
exe(framework.Executor): The executor used to load the persistables.
"""
def if_exist(var):
return os.path.exists(os.path.join(path, var.name))
persistables = []
for var in self.persistables:
if 'reader' not in var and 'double_buffer' not in var:
persistables.append(self.persistables[var])
io.load_vars(exe.exe, path, vars=persistables, predicate=if_exist)
def update_param_shape(self, scope):
"""
Update the shape of parameters in the graph according to tensors in scope.
It is used after loading pruned parameters from file.
"""
for param in self.all_parameters():
tensor_shape = np.array(scope.find_var(param.name()).get_tensor(
)).shape
param.set_shape(tensor_shape)
def infer_shape(self):
"""
Update the groups of convolution layer according to current filters.
It is used after loading pruned parameters from file.
"""
for op in self.ops():
if op.type() != 'conditional_block':
op._op.desc.infer_shape(op._op.block.desc)
def update_groups_of_conv(self):
for op in self.ops():
if op.type() == 'depthwise_conv2d' or op.type(
) == 'depthwise_conv2d_grad':
op.set_attr('groups', op.inputs('Filter')[0].shape()[0])
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import light_nas_strategy
from .light_nas_strategy import *
from . import controller_server
from .controller_server import *
from . import search_agent
from .search_agent import *
from . import search_space
from .search_space import *
from . import lock
from .lock import *
__all__ = light_nas_strategy.__all__
__all__ += controller_server.__all__
__all__ += search_agent.__all__
__all__ += search_space.__all__
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import socket
from threading import Thread
from ....log_helper import get_logger
__all__ = ['ControllerServer']
_logger = get_logger(
__name__,
logging.INFO,
fmt='ControllerServer-%(asctime)s-%(levelname)s: %(message)s')
class ControllerServer(object):
"""
The controller wrapper with a socket server to handle the request of search agent.
"""
def __init__(self,
controller=None,
address=('', 0),
max_client_num=100,
search_steps=None,
key=None):
"""
Args:
controller(slim.searcher.Controller): The controller used to generate tokens.
address(tuple): The address of current server binding with format (ip, port). Default: ('', 0).
which means setting ip automatically
max_client_num(int): The maximum number of clients connecting to current server simultaneously. Default: 100.
search_steps(int): The total steps of searching. None means never stopping. Default: None
"""
self._controller = controller
self._address = address
self._max_client_num = max_client_num
self._search_steps = search_steps
self._closed = False
self._port = address[1]
self._ip = address[0]
self._key = key
def start(self):
self._socket_server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self._socket_server.bind(self._address)
self._socket_server.listen(self._max_client_num)
self._port = self._socket_server.getsockname()[1]
self._ip = self._socket_server.getsockname()[0]
_logger.info("listen on: [{}:{}]".format(self._ip, self._port))
thread = Thread(target=self.run)
thread.start()
return str(thread)
def close(self):
"""Close the server."""
self._closed = True
def port(self):
"""Get the port."""
return self._port
def ip(self):
"""Get the ip."""
return self._ip
def run(self):
_logger.info("Controller Server run...")
while ((self._search_steps is None) or
(self._controller._iter <
(self._search_steps))) and not self._closed:
conn, addr = self._socket_server.accept()
message = conn.recv(1024).decode()
if message.strip("\n") == "next_tokens":
tokens = self._controller.next_tokens()
tokens = ",".join([str(token) for token in tokens])
conn.send(tokens.encode())
else:
_logger.info("recv message from {}: [{}]".format(addr, message))
messages = message.strip('\n').split("\t")
if (len(messages) < 3) or (messages[0] != self._key):
_logger.info("recv noise from {}: [{}]".format(addr,
message))
continue
tokens = messages[1]
reward = messages[2]
tokens = [int(token) for token in tokens.split(",")]
self._controller.update(tokens, float(reward))
tokens = self._controller.next_tokens()
tokens = ",".join([str(token) for token in tokens])
conn.send(tokens.encode())
_logger.info("send message to {}: [{}]".format(addr, tokens))
conn.close()
self._socket_server.close()
_logger.info("server closed!")
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ..core.strategy import Strategy
from ..graph import GraphWrapper
from .controller_server import ControllerServer
from .search_agent import SearchAgent
from ....executor import Executor
from ....log_helper import get_logger
import re
import logging
import functools
import socket
from .lock import lock, unlock
__all__ = ['LightNASStrategy']
_logger = get_logger(
__name__,
logging.INFO,
fmt='LightNASStrategy-%(asctime)s-%(levelname)s: %(message)s')
class LightNASStrategy(Strategy):
"""
Light-NAS search strategy.
"""
def __init__(self,
controller=None,
end_epoch=1000,
target_flops=629145600,
target_latency=0,
retrain_epoch=1,
metric_name='top1_acc',
server_ip=None,
server_port=0,
is_server=False,
max_client_num=100,
search_steps=None,
key="light-nas"):
"""
Args:
controller(searcher.Controller): The searching controller. Default: None.
end_epoch(int): The 'on_epoch_end' function will be called in end_epoch. Default: 0
target_flops(int): The constraint of FLOPS.
target_latency(float): The constraint of latency.
retrain_epoch(int): The number of training epochs before evaluating structure generated by controller. Default: 1.
metric_name(str): The metric used to evaluate the model.
It should be one of keys in out_nodes of graph wrapper. Default: 'top1_acc'
server_ip(str): The ip that controller server listens on. None means getting the ip automatically. Default: None.
server_port(int): The port that controller server listens on. 0 means getting usable port automatically. Default: 0.
is_server(bool): Whether current host is controller server. Default: False.
max_client_num(int): The maximum number of clients that connect to controller server concurrently. Default: 100.
search_steps(int): The total steps of searching. Default: None.
key(str): The key used to identify legal agent for controller server. Default: "light-nas"
"""
self.start_epoch = 0
self.end_epoch = end_epoch
self._max_flops = target_flops
self._max_latency = target_latency
self._metric_name = metric_name
self._controller = controller
self._retrain_epoch = 0
self._server_ip = server_ip
self._server_port = server_port
self._is_server = is_server
self._retrain_epoch = retrain_epoch
self._search_steps = search_steps
self._max_client_num = max_client_num
self._max_try_times = 100
self._key = key
if self._server_ip is None:
self._server_ip = self._get_host_ip()
def _get_host_ip(self):
return socket.gethostbyname(socket.gethostname())
def on_compression_begin(self, context):
self._current_tokens = context.search_space.init_tokens()
self._controller.reset(context.search_space.range_table(),
self._current_tokens, None)
# create controller server
if self._is_server:
open("./slim_LightNASStrategy_controller_server.socket",
'a').close()
socket_file = open(
"./slim_LightNASStrategy_controller_server.socket", 'r+')
lock(socket_file)
tid = socket_file.readline()
if tid == '':
_logger.info("start controller server...")
self._server = ControllerServer(
controller=self._controller,
address=(self._server_ip, self._server_port),
max_client_num=self._max_client_num,
search_steps=self._search_steps,
key=self._key)
tid = self._server.start()
self._server_port = self._server.port()
socket_file.write(tid)
_logger.info("started controller server...")
unlock(socket_file)
socket_file.close()
_logger.info("self._server_ip: {}; self._server_port: {}".format(
self._server_ip, self._server_port))
# create client
self._search_agent = SearchAgent(
self._server_ip, self._server_port, key=self._key)
def __getstate__(self):
"""Socket can't be pickled."""
d = {}
for key in self.__dict__:
if key not in ["_search_agent", "_server"]:
d[key] = self.__dict__[key]
return d
def on_epoch_begin(self, context):
if context.epoch_id >= self.start_epoch and context.epoch_id <= self.end_epoch and (
self._retrain_epoch == 0 or
(context.epoch_id - self.start_epoch) % self._retrain_epoch == 0):
_logger.info("light nas strategy on_epoch_begin")
min_flops = -1
for _ in range(self._max_try_times):
startup_p, train_p, test_p, _, _, train_reader, test_reader = context.search_space.create_net(
self._current_tokens)
context.eval_graph.program = test_p
flops = context.eval_graph.flops()
if min_flops == -1:
min_flops = flops
min_tokens = self._current_tokens[:]
else:
if flops < min_flops:
min_tokens = self._current_tokens[:]
if self._max_latency > 0:
latency = context.search_space.get_model_latency(test_p)
_logger.info("try [{}] with latency {} flops {}".format(
self._current_tokens, latency, flops))
else:
_logger.info("try [{}] with flops {}".format(
self._current_tokens, flops))
if flops > self._max_flops or (self._max_latency > 0 and
latency > self._max_latency):
self._current_tokens = self._controller.next_tokens(
min_tokens)
else:
break
context.train_reader = train_reader
context.eval_reader = test_reader
exe = Executor(context.place)
exe.run(startup_p)
context.optimize_graph.program = train_p
context.optimize_graph.compile()
context.skip_training = (self._retrain_epoch == 0)
def on_epoch_end(self, context):
if context.epoch_id >= self.start_epoch and context.epoch_id < self.end_epoch and (
self._retrain_epoch == 0 or
(context.epoch_id - self.start_epoch + 1
) % self._retrain_epoch == 0):
self._current_reward = context.eval_results[self._metric_name][-1]
flops = context.eval_graph.flops()
if flops > self._max_flops:
self._current_reward = 0.0
if self._max_latency > 0:
test_p = context.search_space.create_net(self._current_tokens)[
2]
latency = context.search_space.get_model_latency(test_p)
if latency > self._max_latency:
self._current_reward = 0.0
_logger.info("reward: {}; latency: {}; flops: {}; tokens: {}".
format(self._current_reward, latency, flops,
self._current_tokens))
else:
_logger.info("reward: {}; flops: {}; tokens: {}".format(
self._current_reward, flops, self._current_tokens))
self._current_tokens = self._search_agent.update(
self._current_tokens, self._current_reward)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
__All__ = ['lock', 'unlock']
if os.name == 'nt':
def lock(file):
raise NotImplementedError('Windows is not supported.')
def unlock(file):
raise NotImplementedError('Windows is not supported.')
elif os.name == 'posix':
from fcntl import flock, LOCK_EX, LOCK_UN
def lock(file):
"""Lock the file in local file system."""
flock(file.fileno(), LOCK_EX)
def unlock(file):
"""Unlock the file in local file system."""
flock(file.fileno(), LOCK_UN)
else:
raise RuntimeError("File Locker only support NT and Posix platforms!")
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import socket
from ....log_helper import get_logger
__all__ = ['SearchAgent']
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
class SearchAgent(object):
"""
Search agent.
"""
def __init__(self, server_ip=None, server_port=None, key=None):
"""
Args:
server_ip(str): The ip that controller server listens on. None means getting the ip automatically. Default: None.
server_port(int): The port that controller server listens on. 0 means getting usable port automatically. Default: 0.
key(str): The key used to identify legal agent for controller server. Default: "light-nas"
"""
self.server_ip = server_ip
self.server_port = server_port
self.socket_client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self._key = key
def update(self, tokens, reward):
"""
Update the controller according to latest tokens and reward.
Args:
tokens(list<int>): The tokens generated in last step.
reward(float): The reward of tokens.
"""
socket_client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
socket_client.connect((self.server_ip, self.server_port))
tokens = ",".join([str(token) for token in tokens])
socket_client.send("{}\t{}\t{}".format(self._key, tokens, reward)
.encode())
tokens = socket_client.recv(1024).decode()
tokens = [int(token) for token in tokens.strip("\n").split(",")]
return tokens
def next_tokens(self):
"""
Get next tokens.
"""
socket_client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
socket_client.connect((self.server_ip, self.server_port))
socket_client.send("next_tokens".encode())
tokens = socket_client.recv(1024).decode()
tokens = [int(token) for token in tokens.strip("\n").split(",")]
return tokens
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The search space used to search neural architecture"""
__all__ = ['SearchSpace']
class SearchSpace(object):
"""Controller for Neural Architecture Search.
"""
def __init__(self, *args, **kwargs):
pass
def init_tokens(self):
"""Get init tokens in search space.
"""
raise NotImplementedError('Abstract method.')
def range_table(self):
"""Get range table of current search space.
"""
raise NotImplementedError('Abstract method.')
def create_net(self, tokens):
"""Create networks for training and evaluation according to tokens.
Args:
tokens(list<int>): The tokens which represent a network.
Return:
(tuple): startup_program, train_program, evaluation_program, train_metrics, test_metrics
"""
raise NotImplementedError('Abstract method.')
def get_model_latency(self, program):
"""Get model latency according to program.
Args:
program(Program): The program to get latency.
Return:
(float): model latency.
"""
raise NotImplementedError('Abstract method.')
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import pruner
from .pruner import *
from . import prune_strategy
from .prune_strategy import *
from . import auto_prune_strategy
from .auto_prune_strategy import *
__all__ = pruner.__all__
__all__ += prune_strategy.__all__
__all__ += auto_prune_strategy.__all__
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .prune_strategy import PruneStrategy
import re
import logging
import functools
import copy
from ....log_helper import get_logger
__all__ = ['AutoPruneStrategy']
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
class AutoPruneStrategy(PruneStrategy):
"""
Automatic pruning strategy.
"""
def __init__(self,
pruner=None,
controller=None,
start_epoch=0,
end_epoch=10,
min_ratio=0.5,
max_ratio=0.7,
metric_name='top1_acc',
pruned_params='conv.*_weights',
retrain_epoch=0,
uniform_range=None,
init_tokens=None):
"""
Args:
pruner(slim.Pruner): The pruner used to prune the parameters. Default: None.
controller(searcher.Controller): The searching controller. Default: None.
start_epoch(int): The 'on_epoch_begin' function will be called in start_epoch. Default: 0
end_epoch(int): The 'on_epoch_end' function will be called in end_epoch. Default: 0
min_ratio(float): The maximum pruned ratio. Default: 0.7
max_ratio(float): The minimum pruned ratio. Default: 0.5
metric_name(str): The metric used to evaluate the model.
It should be one of keys in out_nodes of graph wrapper. Default: 'top1_acc'
pruned_params(str): The pattern str to match the parameter names to be pruned. Default: 'conv.*_weights'
retrain_epoch(int): The training epochs in each searching step. Default: 0
uniform_range(int): The token range in each position of tokens generated by controller. None means getting the range automatically. Default: None.
init_tokens(list<int>): The initial tokens. None means getting the initial tokens automatically. Default: None.
"""
super(AutoPruneStrategy, self).__init__(pruner, start_epoch, end_epoch,
0.0, metric_name, pruned_params)
self._max_ratio = max_ratio
self._min_ratio = min_ratio
self._controller = controller
self._metric_name = metric_name
self._pruned_param_names = []
self._retrain_epoch = retrain_epoch
self._uniform_range = uniform_range
self._init_tokens = init_tokens
self._current_tokens = None
def on_compression_begin(self, context):
"""
Prepare some information for searching strategy.
step 1: Find all the parameters to be pruned.
step 2: Get initial tokens and setup controller.
"""
pruned_params = []
for param in context.eval_graph.all_parameters():
if re.match(self.pruned_params, param.name()):
self._pruned_param_names.append(param.name())
if self._init_tokens is not None:
self._current_tokens = self._init_tokens
else:
self._current_tokens = self._get_init_tokens(context)
if self._uniform_range is not None:
self._range_table = [round(self._uniform_range, 2) / 0.01] * len(
self._pruned_param_names)
else:
self._range_table = copy.deepcopy(self._current_tokens)
_logger.info('init tokens: {}'.format(self._current_tokens))
_logger.info("range_table: {}".format(self._range_table))
constrain_func = functools.partial(
self._constrain_func, context=context)
self._controller.reset(self._range_table, self._current_tokens,
constrain_func)
def _constrain_func(self, tokens, context=None):
"""Check whether the tokens meet constraint."""
ori_flops = context.eval_graph.flops()
ratios = self._tokens_to_ratios(tokens)
params = self._pruned_param_names
param_shape_backup = {}
self._prune_parameters(
context.eval_graph,
context.scope,
params,
ratios,
context.place,
only_graph=True,
param_shape_backup=param_shape_backup)
context.eval_graph.update_groups_of_conv()
flops = context.eval_graph.flops()
for param in param_shape_backup.keys():
context.eval_graph.var(param).set_shape(param_shape_backup[param])
flops_ratio = (1 - float(flops) / ori_flops)
if flops_ratio >= self._min_ratio and flops_ratio <= self._max_ratio:
_logger.info("Success try [{}]; flops: -{}".format(tokens,
flops_ratio))
return True
else:
_logger.info("Failed try [{}]; flops: -{}".format(tokens,
flops_ratio))
return False
def _get_init_tokens(self, context):
"""Get initial tokens.
"""
ratios = self._get_uniform_ratios(context)
_logger.info('Get init ratios: {}'.format(
[round(r, 2) for r in ratios]))
return self._ratios_to_tokens(ratios)
def _ratios_to_tokens(self, ratios):
"""Convert pruned ratios to tokens.
"""
return [int(ratio / 0.01) for ratio in ratios]
def _tokens_to_ratios(self, tokens):
"""Convert tokens to pruned ratios.
"""
return [token * 0.01 for token in tokens]
def _get_uniform_ratios(self, context):
"""
Search a group of uniform ratios.
"""
min_ratio = 0.
max_ratio = 1.
target = (self._min_ratio + self._max_ratio) / 2
flops = context.eval_graph.flops()
model_size = context.eval_graph.numel_params()
ratios = None
while min_ratio < max_ratio:
ratio = (max_ratio + min_ratio) / 2
ratios = [ratio] * len(self._pruned_param_names)
param_shape_backup = {}
self._prune_parameters(
context.eval_graph,
context.scope,
self._pruned_param_names,
ratios,
context.place,
only_graph=True,
param_shape_backup=param_shape_backup)
pruned_flops = 1 - (float(context.eval_graph.flops()) / flops)
pruned_size = 1 - (float(context.eval_graph.numel_params()) /
model_size)
for param in param_shape_backup.keys():
context.eval_graph.var(param).set_shape(param_shape_backup[
param])
if abs(pruned_flops - target) < 1e-2:
break
if pruned_flops > target:
max_ratio = ratio
else:
min_ratio = ratio
_logger.info('Get ratios: {}'.format([round(r, 2) for r in ratios]))
return ratios
def on_epoch_begin(self, context):
"""
step 1: Get a new tokens from controller.
step 2: Pruning eval_graph and optimize_program by tokens
"""
if context.epoch_id >= self.start_epoch and context.epoch_id <= self.end_epoch and (
self._retrain_epoch == 0 or
(context.epoch_id - self.start_epoch) % self._retrain_epoch == 0):
_logger.info("on_epoch_begin")
params = self._pruned_param_names
ratios = self._tokens_to_ratios(self._current_tokens)
self._param_shape_backup = {}
self._param_backup = {}
self._prune_parameters(
context.optimize_graph,
context.scope,
params,
ratios,
context.place,
param_backup=self._param_backup,
param_shape_backup=self._param_shape_backup)
self._prune_graph(context.eval_graph, context.optimize_graph)
context.optimize_graph.update_groups_of_conv()
context.eval_graph.update_groups_of_conv()
context.optimize_graph.compile(
mem_opt=False) # to update the compiled program
context.skip_training = (self._retrain_epoch == 0)
def on_epoch_end(self, context):
"""
step 1: Get reward of current tokens and update controller.
step 2: Restore eval_graph and optimize_graph
"""
if context.epoch_id >= self.start_epoch and context.epoch_id < self.end_epoch and (
self._retrain_epoch == 0 or
(context.epoch_id - self.start_epoch + 1
) % self._retrain_epoch == 0):
_logger.info("on_epoch_end")
reward = context.eval_results[self._metric_name][-1]
self._controller.update(self._current_tokens, reward)
self._current_tokens = self._controller.next_tokens()
# restore pruned parameters
for param_name in self._param_backup.keys():
param_t = context.scope.find_var(param_name).get_tensor()
param_t.set(self._param_backup[param_name], context.place)
self._param_backup = {}
# restore shape of parameters
for param in self._param_shape_backup.keys():
context.optimize_graph.var(param).set_shape(
self._param_shape_backup[param])
self._param_shape_backup = {}
self._prune_graph(context.eval_graph, context.optimize_graph)
context.optimize_graph.update_groups_of_conv()
context.eval_graph.update_groups_of_conv()
context.optimize_graph.compile(
mem_opt=False) # to update the compiled program
elif context.epoch_id == self.end_epoch: # restore graph for final training
# restore pruned parameters
for param_name in self._param_backup.keys():
param_t = context.scope.find_var(param_name).get_tensor()
param_t.set(self.param_backup[param_name], context.place)
# restore shape of parameters
for param in self._param_shape_backup.keys():
context.eval_graph.var(param).set_shape(
self._param_shape_backup[param])
context.optimize_graph.var(param).set_shape(
self._param_shape_backup[param])
context.optimize_graph.update_groups_of_conv()
context.eval_graph.update_groups_of_conv()
params, ratios = self._get_prune_ratios(
self._controller._best_tokens)
self._prune_parameters(context.optimize_graph, context.scope,
params, ratios, context.place)
self._prune_graph(context.eval_graph, context.optimize_graph)
context.optimize_graph.update_groups_of_conv()
context.eval_graph.update_groups_of_conv()
context.optimize_graph.compile(
mem_opt=True) # to update the compiled program
context.skip_training = False
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ..core.strategy import Strategy
from ..graph import VarWrapper, OpWrapper, GraphWrapper
from ....framework import Program, program_guard, Parameter
from ....log_helper import get_logger
from .... import layers
import prettytable as pt
import numpy as np
from scipy.optimize import leastsq
import copy
import re
import os
import pickle
import logging
import sys
__all__ = ['SensitivePruneStrategy', 'UniformPruneStrategy', 'PruneStrategy']
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
class PruneStrategy(Strategy):
"""
The base class of all pruning strategies.
"""
def __init__(self,
pruner=None,
start_epoch=0,
end_epoch=0,
target_ratio=0.5,
metric_name=None,
pruned_params='conv.*_weights'):
"""
Args:
pruner(slim.Pruner): The pruner used to prune the parameters.
start_epoch(int): The 'on_epoch_begin' function will be called in start_epoch. default: 0
end_epoch(int): The 'on_epoch_end' function will be called in end_epoch. default: 0
target_ratio(float): The flops ratio to be pruned from current model.
metric_name(str): The metric used to evaluate the model.
It should be one of keys in out_nodes of graph wrapper.
pruned_params(str): The pattern str to match the parameter names to be pruned.
"""
super(PruneStrategy, self).__init__(start_epoch, end_epoch)
self.pruner = pruner
self.target_ratio = target_ratio
self.metric_name = metric_name
self.pruned_params = pruned_params
self.pruned_list = []
def _eval_graph(self, context, sampled_rate=None, cached_id=0):
"""
Evaluate the current mode in context.
Args:
context(slim.core.Context): The context storing all information used to evaluate the current model.
sampled_rate(float): The sampled rate used to sample partial data for evaluation. None means using all data in eval_reader. default: None.
cached_id(int): The id of dataset sampled. Evaluations with same cached_id use the same sampled dataset. default: 0.
"""
results, names = context.run_eval_graph(sampled_rate, cached_id)
metric = np.mean(results[list(names).index(self.metric_name)])
return metric
def _prune_filters_by_ratio(self,
scope,
params,
ratio,
place,
lazy=False,
only_graph=False,
param_shape_backup=None,
param_backup=None):
"""
Pruning filters by given ratio.
Args:
scope(fluid.core.Scope): The scope used to pruning filters.
params(list<VarWrapper>): A list of filter parameters.
ratio(float): The ratio to be pruned.
place(fluid.Place): The device place of filter parameters.
lazy(bool): True means setting the pruned elements to zero.
False means cutting down the pruned elements.
only_graph(bool): True means only modifying the graph.
False means modifying graph and variables in scope.
"""
if params[0].name() in self.pruned_list[0]:
return
param_t = scope.find_var(params[0].name()).get_tensor()
pruned_idx = self.pruner.cal_pruned_idx(
params[0].name(), np.array(param_t), ratio, axis=0)
for param in params:
assert isinstance(param, VarWrapper)
param_t = scope.find_var(param.name()).get_tensor()
if param_backup is not None and (param.name() not in param_backup):
param_backup[param.name()] = copy.deepcopy(np.array(param_t))
pruned_param = self.pruner.prune_tensor(
np.array(param_t), pruned_idx, pruned_axis=0, lazy=lazy)
if not only_graph:
param_t.set(pruned_param, place)
ori_shape = param.shape()
if param_shape_backup is not None and (
param.name() not in param_shape_backup):
param_shape_backup[param.name()] = copy.deepcopy(param.shape())
new_shape = list(param.shape())
new_shape[0] = pruned_param.shape[0]
param.set_shape(new_shape)
_logger.debug(
'|----------------------------------------+----+------------------------------+------------------------------|'
)
_logger.debug('|{:^40}|{:^4}|{:^30}|{:^30}|'.format(
str(param.name()),
str(ratio), str(ori_shape), str(param.shape())))
self.pruned_list[0].append(param.name())
return pruned_idx
def _prune_parameter_by_idx(self,
scope,
params,
pruned_idx,
pruned_axis,
place,
lazy=False,
only_graph=False,
param_shape_backup=None,
param_backup=None):
"""
Pruning parameters in given axis.
Args:
scope(fluid.core.Scope): The scope storing paramaters to be pruned.
params(VarWrapper): The parameter to be pruned.
pruned_idx(list): The index of elements to be pruned.
pruned_axis(int): The pruning axis.
place(fluid.Place): The device place of filter parameters.
lazy(bool): True means setting the pruned elements to zero.
False means cutting down the pruned elements.
only_graph(bool): True means only modifying the graph.
False means modifying graph and variables in scope.
"""
if params[0].name() in self.pruned_list[pruned_axis]:
return
for param in params:
assert isinstance(param, VarWrapper)
param_t = scope.find_var(param.name()).get_tensor()
if param_backup is not None and (param.name() not in param_backup):
param_backup[param.name()] = copy.deepcopy(np.array(param_t))
pruned_param = self.pruner.prune_tensor(
np.array(param_t), pruned_idx, pruned_axis, lazy=lazy)
if not only_graph:
param_t.set(pruned_param, place)
ori_shape = param.shape()
if param_shape_backup is not None and (
param.name() not in param_shape_backup):
param_shape_backup[param.name()] = copy.deepcopy(param.shape())
new_shape = list(param.shape())
new_shape[pruned_axis] = pruned_param.shape[pruned_axis]
param.set_shape(new_shape)
_logger.debug(
'|----------------------------------------+----+------------------------------+------------------------------|'
)
_logger.debug('|{:^40}|{:^4}|{:^30}|{:^30}|'.format(
str(param.name()),
str(pruned_axis), str(ori_shape), str(param.shape())))
self.pruned_list[pruned_axis].append(param.name())
def _forward_search_related_op(self, graph, param):
"""
Forward search operators that will be affected by pruning of param.
Args:
graph(GraphWrapper): The graph to be searched.
param(VarWrapper): The current pruned parameter.
Returns:
list<OpWrapper>: A list of operators.
"""
assert isinstance(param, VarWrapper)
visited = {}
for op in graph.ops():
visited[op.idx()] = False
stack = []
for op in graph.ops():
if (not op.is_bwd_op()) and (param in op.all_inputs()):
stack.append(op)
visit_path = []
while len(stack) > 0:
top_op = stack[len(stack) - 1]
if visited[top_op.idx()] == False:
visit_path.append(top_op)
visited[top_op.idx()] = True
next_ops = None
if top_op.type() == "conv2d" and param not in top_op.all_inputs():
next_ops = None
elif top_op.type() == "mul":
next_ops = None
else:
next_ops = self._get_next_unvisited_op(graph, visited, top_op)
if next_ops == None:
stack.pop()
else:
stack += next_ops
return visit_path
def _get_next_unvisited_op(self, graph, visited, top_op):
"""
Get next unvisited adjacent operators of given operators.
Args:
graph(GraphWrapper): The graph used to search.
visited(list): The ids of operators that has been visited.
top_op: The given operator.
Returns:
list<OpWrapper>: A list of operators.
"""
assert isinstance(top_op, OpWrapper)
next_ops = []
for op in graph.next_ops(top_op):
if (visited[op.idx()] == False) and (not op.is_bwd_op()):
next_ops.append(op)
return next_ops if len(next_ops) > 0 else None
def _get_accumulator(self, graph, param):
"""
Get accumulators of given parameter. The accumulator was created by optimizer.
Args:
graph(GraphWrapper): The graph used to search.
param(VarWrapper): The given parameter.
Returns:
list<VarWrapper>: A list of accumulators which are variables.
"""
assert isinstance(param, VarWrapper)
params = []
for op in param.outputs():
if op.is_opt_op():
for out_var in op.all_outputs():
if graph.is_persistable(out_var) and out_var.name(
) != param.name():
params.append(out_var)
return params
def _forward_pruning_ralated_params(self,
graph,
scope,
param,
place,
ratio=None,
pruned_idxs=None,
lazy=False,
only_graph=False,
param_backup=None,
param_shape_backup=None):
"""
Pruning all the parameters affected by the pruning of given parameter.
Args:
graph(GraphWrapper): The graph to be searched.
scope(fluid.core.Scope): The scope storing paramaters to be pruned.
param(VarWrapper): The given parameter.
place(fluid.Place): The device place of filter parameters.
ratio(float): The target ratio to be pruned.
pruned_idx(list): The index of elements to be pruned.
lazy(bool): True means setting the pruned elements to zero.
False means cutting down the pruned elements.
only_graph(bool): True means only modifying the graph.
False means modifying graph and variables in scope.
"""
assert isinstance(
graph,
GraphWrapper), "graph must be instance of slim.core.GraphWrapper"
assert isinstance(
param, VarWrapper), "param must be instance of slim.core.VarWrapper"
if param.name() in self.pruned_list[0]:
return
related_ops = self._forward_search_related_op(graph, param)
if ratio is None:
assert pruned_idxs is not None
self._prune_parameter_by_idx(
scope, [param] + self._get_accumulator(graph, param),
pruned_idxs,
pruned_axis=0,
place=place,
lazy=lazy,
only_graph=only_graph,
param_backup=param_backup,
param_shape_backup=param_shape_backup)
else:
pruned_idxs = self._prune_filters_by_ratio(
scope, [param] + self._get_accumulator(graph, param),
ratio,
place,
lazy=lazy,
only_graph=only_graph,
param_backup=param_backup,
param_shape_backup=param_shape_backup)
corrected_idxs = pruned_idxs[:]
for idx, op in enumerate(related_ops):
if op.type() == "conv2d" and (param not in op.all_inputs()):
for in_var in op.all_inputs():
if graph.is_parameter(in_var):
conv_param = in_var
self._prune_parameter_by_idx(
scope, [conv_param] + self._get_accumulator(
graph, conv_param),
corrected_idxs,
pruned_axis=1,
place=place,
lazy=lazy,
only_graph=only_graph,
param_backup=param_backup,
param_shape_backup=param_shape_backup)
if op.type() == "depthwise_conv2d":
for in_var in op.all_inputs():
if graph.is_parameter(in_var):
conv_param = in_var
self._prune_parameter_by_idx(
scope, [conv_param] + self._get_accumulator(
graph, conv_param),
corrected_idxs,
pruned_axis=0,
place=place,
lazy=lazy,
only_graph=only_graph,
param_backup=param_backup,
param_shape_backup=param_shape_backup)
elif op.type() == "elementwise_add":
# pruning bias
for in_var in op.all_inputs():
if graph.is_parameter(in_var):
bias_param = in_var
self._prune_parameter_by_idx(
scope, [bias_param] + self._get_accumulator(
graph, bias_param),
pruned_idxs,
pruned_axis=0,
place=place,
lazy=lazy,
only_graph=only_graph,
param_backup=param_backup,
param_shape_backup=param_shape_backup)
elif op.type() == "mul": # pruning fc layer
fc_input = None
fc_param = None
for in_var in op.all_inputs():
if graph.is_parameter(in_var):
fc_param = in_var
else:
fc_input = in_var
idx = []
feature_map_size = fc_input.shape()[2] * fc_input.shape()[3]
range_idx = np.array(range(feature_map_size))
for i in corrected_idxs:
idx += list(range_idx + i * feature_map_size)
corrected_idxs = idx
self._prune_parameter_by_idx(
scope, [fc_param] + self._get_accumulator(graph, fc_param),
corrected_idxs,
pruned_axis=0,
place=place,
lazy=lazy,
only_graph=only_graph,
param_backup=param_backup,
param_shape_backup=param_shape_backup)
elif op.type() == "concat":
concat_inputs = op.all_inputs()
last_op = related_ops[idx - 1]
for out_var in last_op.all_outputs():
if out_var in concat_inputs:
concat_idx = concat_inputs.index(out_var)
offset = 0
for ci in range(concat_idx):
offset += concat_inputs[ci].shape()[1]
corrected_idxs = [x + offset for x in pruned_idxs]
elif op.type() == "batch_norm":
bn_inputs = op.all_inputs()
mean = bn_inputs[2]
variance = bn_inputs[3]
alpha = bn_inputs[0]
beta = bn_inputs[1]
self._prune_parameter_by_idx(
scope, [mean] + self._get_accumulator(graph, mean),
corrected_idxs,
pruned_axis=0,
place=place,
lazy=lazy,
only_graph=only_graph,
param_backup=param_backup,
param_shape_backup=param_shape_backup)
self._prune_parameter_by_idx(
scope, [variance] + self._get_accumulator(graph, variance),
corrected_idxs,
pruned_axis=0,
place=place,
lazy=lazy,
only_graph=only_graph,
param_backup=param_backup,
param_shape_backup=param_shape_backup)
self._prune_parameter_by_idx(
scope, [alpha] + self._get_accumulator(graph, alpha),
corrected_idxs,
pruned_axis=0,
place=place,
lazy=lazy,
only_graph=only_graph,
param_backup=param_backup,
param_shape_backup=param_shape_backup)
self._prune_parameter_by_idx(
scope, [beta] + self._get_accumulator(graph, beta),
corrected_idxs,
pruned_axis=0,
place=place,
lazy=lazy,
only_graph=only_graph,
param_backup=param_backup,
param_shape_backup=param_shape_backup)
def _prune_parameters(self,
graph,
scope,
params,
ratios,
place,
lazy=False,
only_graph=False,
param_backup=None,
param_shape_backup=None):
"""
Pruning the given parameters.
Args:
graph(GraphWrapper): The graph to be searched.
scope(fluid.core.Scope): The scope storing paramaters to be pruned.
params(list<str>): A list of parameter names to be pruned.
ratios(list<float>): A list of ratios to be used to pruning parameters.
place(fluid.Place): The device place of filter parameters.
pruned_idx(list): The index of elements to be pruned.
lazy(bool): True means setting the pruned elements to zero.
False means cutting down the pruned elements.
only_graph(bool): True means only modifying the graph.
False means modifying graph and variables in scope.
"""
_logger.debug('\n################################')
_logger.debug('# pruning parameters #')
_logger.debug('################################\n')
_logger.debug(
'|----------------------------------------+----+------------------------------+------------------------------|'
)
_logger.debug('|{:^40}|{:^4}|{:^30}|{:^30}|'.format('parameter', 'axis',
'from', 'to'))
assert len(params) == len(ratios)
self.pruned_list = [[], []]
for param, ratio in zip(params, ratios):
assert isinstance(param, str) or isinstance(param, unicode)
param = graph.var(param)
self._forward_pruning_ralated_params(
graph,
scope,
param,
place,
ratio=ratio,
lazy=lazy,
only_graph=only_graph,
param_backup=param_backup,
param_shape_backup=param_shape_backup)
ops = param.outputs()
for op in ops:
if op.type() == 'conv2d':
brother_ops = self._search_brother_ops(graph, op)
for broher in brother_ops:
for p in graph.get_param_by_op(broher):
self._forward_pruning_ralated_params(
graph,
scope,
p,
place,
ratio=ratio,
lazy=lazy,
only_graph=only_graph,
param_backup=param_backup,
param_shape_backup=param_shape_backup)
_logger.debug(
'|----------------------------------------+----+------------------------------+------------------------------|'
)
def _search_brother_ops(self, graph, op_node):
"""
Search brother operators that was affected by pruning of given operator.
Args:
graph(GraphWrapper): The graph to be searched.
op_node(OpWrapper): The start node for searching.
Returns:
list<VarWrapper>: A list of operators.
"""
visited = [op_node.idx()]
stack = []
brothers = []
for op in graph.next_ops(op_node):
if (op.type() != 'conv2d') and (op.type() != 'fc') and (
not op._is_bwd_op()):
stack.append(op)
visited.append(op.idx())
while len(stack) > 0:
top_op = stack.pop()
for parent in graph.pre_ops(top_op):
if parent.idx() not in visited and (not parent._is_bwd_op()):
if ((parent.type == 'conv2d') or (parent.type == 'fc')):
brothers.append(parent)
else:
stack.append(parent)
visited.append(parent.idx())
for child in graph.next_ops(top_op):
if (child.type != 'conv2d') and (child.type != 'fc') and (
child.idx() not in visited) and (
not child._is_bwd_op()):
stack.append(child)
visited.append(child.idx())
return brothers
def _prune_graph(self, graph, target_graph):
"""
Pruning parameters of graph according to target graph.
Args:
graph(GraphWrapper): The graph to be pruned.
target_graph(GraphWrapper): The reference graph.
Return: None
"""
count = 1
_logger.debug(
'|----+----------------------------------------+------------------------------+------------------------------|'
)
_logger.debug('|{:^4}|{:^40}|{:^30}|{:^30}|'.format('id', 'parammeter',
'from', 'to'))
for param in target_graph.all_parameters():
var = graph.var(param.name())
ori_shape = var.shape()
var.set_shape(param.shape())
_logger.debug(
'|----+----------------------------------------+------------------------------+------------------------------|'
)
_logger.debug('|{:^4}|{:^40}|{:^30}|{:^30}|'.format(
str(count),
str(param.name()), str(ori_shape), str(param.shape())))
count += 1
_logger.debug(
'|----+----------------------------------------+------------------------------+------------------------------|'
)
class UniformPruneStrategy(PruneStrategy):
"""
The uniform pruning strategy. The parameters will be pruned by uniform ratio.
"""
def __init__(self,
pruner=None,
start_epoch=0,
end_epoch=0,
target_ratio=0.5,
metric_name=None,
pruned_params='conv.*_weights'):
"""
Args:
pruner(slim.Pruner): The pruner used to prune the parameters.
start_epoch(int): The 'on_epoch_begin' function will be called in start_epoch. default: 0
end_epoch(int): The 'on_epoch_end' function will be called in end_epoch. default: 0
target_ratio(float): The flops ratio to be pruned from current model.
metric_name(str): The metric used to evaluate the model.
It should be one of keys in out_nodes of graph wrapper.
pruned_params(str): The pattern str to match the parameter names to be pruned.
"""
super(UniformPruneStrategy, self).__init__(pruner, start_epoch,
end_epoch, target_ratio,
metric_name, pruned_params)
def _get_best_ratios(self, context):
"""
Search a group of ratios for pruning target flops.
"""
_logger.info('_get_best_ratios')
pruned_params = []
for param in context.eval_graph.all_parameters():
if re.match(self.pruned_params, param.name()):
pruned_params.append(param.name())
min_ratio = 0.
max_ratio = 1.
flops = context.eval_graph.flops()
model_size = context.eval_graph.numel_params()
while min_ratio < max_ratio:
ratio = (max_ratio + min_ratio) / 2
_logger.debug(
'-----------Try pruning ratio: {:.2f}-----------'.format(ratio))
ratios = [ratio] * len(pruned_params)
param_shape_backup = {}
self._prune_parameters(
context.eval_graph,
context.scope,
pruned_params,
ratios,
context.place,
only_graph=True,
param_shape_backup=param_shape_backup)
pruned_flops = 1 - (float(context.eval_graph.flops()) / flops)
pruned_size = 1 - (float(context.eval_graph.numel_params()) /
model_size)
_logger.debug('Pruned flops: {:.2f}'.format(pruned_flops))
_logger.debug('Pruned model size: {:.2f}'.format(pruned_size))
for param in param_shape_backup.keys():
context.eval_graph.var(param).set_shape(param_shape_backup[
param])
if abs(pruned_flops - self.target_ratio) < 1e-2:
break
if pruned_flops > self.target_ratio:
max_ratio = ratio
else:
min_ratio = ratio
_logger.info('Get ratios: {}'.format([round(r, 2) for r in ratios]))
return pruned_params, ratios
def restore_from_checkpoint(self, context):
self._prune(context, self.params, self.ratios)
def _prune(self, context, params, ratios):
self._prune_parameters(context.optimize_graph, context.scope, params,
ratios, context.place)
model_size = context.eval_graph.numel_params()
flops = context.eval_graph.flops()
_logger.debug('\n################################')
_logger.debug('# pruning eval graph #')
_logger.debug('################################\n')
self._prune_graph(context.eval_graph, context.optimize_graph)
context.optimize_graph.update_groups_of_conv()
context.eval_graph.update_groups_of_conv()
_logger.info(
'------------------finish pruning--------------------------------')
_logger.info('Pruned size: {:.2f}'.format(1 - (float(
context.eval_graph.numel_params()) / model_size)))
_logger.info('Pruned flops: {:.2f}'.format(1 - (float(
context.eval_graph.flops()) / flops)))
def on_epoch_begin(self, context):
if context.epoch_id == self.start_epoch:
params, ratios = self._get_best_ratios(context)
self.params = params
self.ratios = ratios
self._prune(context, params, ratios)
_logger.info(
'------------------UniformPruneStrategy.on_compression_begin finish--------------------------------'
)
class SensitivePruneStrategy(PruneStrategy):
"""
Sensitive pruning strategy. Different pruned ratio was applied on each layer.
"""
def __init__(self,
pruner=None,
start_epoch=0,
end_epoch=0,
delta_rate=0.20,
target_ratio=0.5,
metric_name='top1_acc',
pruned_params='conv.*_weights',
sensitivities_file='./sensitivities.data',
sensitivities={},
num_steps=1,
eval_rate=None):
"""
Args:
pruner(slim.Pruner): The pruner used to prune the parameters.
start_epoch(int): The 'on_epoch_begin' function will be called in start_epoch. default: 0.
end_epoch(int): The 'on_epoch_end' function will be called in end_epoch. default: 10.
delta_rate(float): The delta used to generate ratios when calculating sensitivities. default: 0.2
target_ratio(float): The flops ratio to be pruned from current model. default: 0.5
metric_name(str): The metric used to evaluate the model.
It should be one of keys in out_nodes of graph wrapper. default: 'top1_acc'
pruned_params(str): The pattern str to match the parameter names to be pruned. default: 'conv.*_weights'.
sensitivities_file(str): The sensitivities file. default: './sensitivities.data'
sensitivities(dict): The user-defined sensitivities. default: {}.
num_steps(int): The number of pruning steps. default: 1.
eval_rate(float): The rate of sampled data used to calculate sensitivities.
None means using all the data. default: None.
"""
super(SensitivePruneStrategy, self).__init__(pruner, start_epoch,
end_epoch, target_ratio,
metric_name, pruned_params)
self.delta_rate = delta_rate
self.pruned_list = []
self.sensitivities = sensitivities
self.sensitivities_file = sensitivities_file
self.num_steps = num_steps
self.eval_rate = eval_rate
self.pruning_step = 1 - pow((1 - target_ratio), 1.0 / self.num_steps)
def _save_sensitivities(self, sensitivities, sensitivities_file):
"""
Save sensitivities into file.
"""
with open(sensitivities_file, 'wb') as f:
pickle.dump(sensitivities, f)
def _load_sensitivities(self, sensitivities_file):
"""
Load sensitivities from file.
"""
sensitivities = {}
if sensitivities_file and os.path.exists(sensitivities_file):
with open(sensitivities_file, 'rb') as f:
if sys.version_info < (3, 0):
sensitivities = pickle.load(f)
else:
sensitivities = pickle.load(f, encoding='bytes')
for param in sensitivities:
sensitivities[param]['pruned_percent'] = [
round(p, 2) for p in sensitivities[param]['pruned_percent']
]
self._format_sensitivities(sensitivities)
return sensitivities
def _format_sensitivities(self, sensitivities):
"""
Print formatted sensitivities in debug log level.
"""
tb = pt.PrettyTable()
tb.field_names = ["parameter", "size"] + [
str(round(i, 2))
for i in np.arange(self.delta_rate, 1, self.delta_rate)
]
for param in sensitivities:
if len(sensitivities[param]['loss']) == (len(tb.field_names) - 2):
tb.add_row([param, sensitivities[param]['size']] + [
round(loss, 2) for loss in sensitivities[param]['loss']
])
_logger.debug('\n################################')
_logger.debug('# sensitivities table #')
_logger.debug('################################\n')
_logger.debug(tb)
def _compute_sensitivities(self, context):
"""
Computing the sensitivities of all parameters.
"""
_logger.info("calling _compute_sensitivities.")
cached_id = np.random.randint(1000)
if self.start_epoch == context.epoch_id:
sensitivities_file = self.sensitivities_file
else:
sensitivities_file = self.sensitivities_file + ".epoch" + str(
context.epoch_id)
sensitivities = self._load_sensitivities(sensitivities_file)
for param in context.eval_graph.all_parameters():
if not re.match(self.pruned_params, param.name()):
continue
if param.name() not in sensitivities:
sensitivities[param.name()] = {
'pruned_percent': [],
'loss': [],
'size': param.shape()[0]
}
metric = None
for param in sensitivities.keys():
ratio = self.delta_rate
while ratio < 1:
ratio = round(ratio, 2)
if ratio in sensitivities[param]['pruned_percent']:
_logger.debug('{}, {} has computed.'.format(param, ratio))
ratio += self.delta_rate
continue
if metric is None:
metric = self._eval_graph(context, self.eval_rate,
cached_id)
param_backup = {}
# prune parameter by ratio
self._prune_parameters(
context.eval_graph,
context.scope, [param], [ratio],
context.place,
lazy=True,
param_backup=param_backup)
self.pruned_list[0]
# get accuracy after pruning and update self.sensitivities
pruned_metric = self._eval_graph(context, self.eval_rate,
cached_id)
loss = metric - pruned_metric
_logger.info("pruned param: {}; {}; loss={}".format(
param, ratio, loss))
for brother in self.pruned_list[0]:
if re.match(self.pruned_params, brother):
if brother not in sensitivities:
sensitivities[brother] = {
'pruned_percent': [],
'loss': []
}
sensitivities[brother]['pruned_percent'].append(ratio)
sensitivities[brother]['loss'].append(loss)
self._save_sensitivities(sensitivities, sensitivities_file)
# restore pruned parameters
for param_name in param_backup.keys():
param_t = context.scope.find_var(param_name).get_tensor()
param_t.set(param_backup[param_name], context.place)
# pruned_metric = self._eval_graph(context)
ratio += self.delta_rate
return sensitivities
def _get_best_ratios(self, context, sensitivities, target_ratio):
"""
Search a group of ratios for pruning target flops.
"""
_logger.info('_get_best_ratios for pruning ratie: {}'.format(
target_ratio))
def func(params, x):
a, b, c, d = params
return a * x * x * x + b * x * x + c * x + d
def error(params, x, y):
return func(params, x) - y
def slove_coefficient(x, y):
init_coefficient = [10, 10, 10, 10]
coefficient, loss = leastsq(error, init_coefficient, args=(x, y))
return coefficient
min_loss = 0.
max_loss = 0.
# step 1: fit curve by sensitivities
coefficients = {}
for param in sensitivities:
losses = np.array([0] * 5 + sensitivities[param]['loss'])
precents = np.array([0] * 5 + sensitivities[param][
'pruned_percent'])
coefficients[param] = slove_coefficient(precents, losses)
loss = np.max(losses)
max_loss = np.max([max_loss, loss])
# step 2: Find a group of ratios by binary searching.
flops = context.eval_graph.flops()
model_size = context.eval_graph.numel_params()
ratios = []
while min_loss < max_loss:
loss = (max_loss + min_loss) / 2
_logger.info(
'-----------Try pruned ratios while acc loss={:.4f}-----------'.
format(loss))
ratios = []
# step 2.1: Get ratios according to current loss
for param in sensitivities:
coefficient = copy.deepcopy(coefficients[param])
coefficient[-1] = coefficient[-1] - loss
roots = np.roots(coefficient)
for root in roots:
min_root = 1
if np.isreal(root) and root > 0 and root < 1:
selected_root = min(root.real, min_root)
ratios.append(selected_root)
_logger.info('Pruned ratios={}'.format(
[round(ratio, 3) for ratio in ratios]))
# step 2.2: Pruning by current ratios
param_shape_backup = {}
self._prune_parameters(
context.eval_graph,
context.scope,
sensitivities.keys(),
ratios,
context.place,
only_graph=True,
param_shape_backup=param_shape_backup)
pruned_flops = 1 - (float(context.eval_graph.flops()) / flops)
pruned_size = 1 - (float(context.eval_graph.numel_params()) /
model_size)
_logger.info('Pruned flops: {:.4f}'.format(pruned_flops))
_logger.info('Pruned model size: {:.4f}'.format(pruned_size))
for param in param_shape_backup.keys():
context.eval_graph.var(param).set_shape(param_shape_backup[
param])
# step 2.3: Check whether current ratios is enough
if abs(pruned_flops - target_ratio) < 0.015:
break
if pruned_flops > target_ratio:
max_loss = loss
else:
min_loss = loss
return sensitivities.keys(), ratios
def _current_pruning_target(self, context):
'''
Get the target pruning rate in current epoch.
'''
_logger.info('Left number of pruning steps: {}'.format(self.num_steps))
if self.num_steps <= 0:
return None
if (self.start_epoch == context.epoch_id) or context.eval_converged(
self.metric_name, 0.005):
self.num_steps -= 1
return self.pruning_step
def on_epoch_begin(self, context):
current_ratio = self._current_pruning_target(context)
if current_ratio is not None:
sensitivities = self._compute_sensitivities(context)
params, ratios = self._get_best_ratios(context, sensitivities,
current_ratio)
self._prune_parameters(context.optimize_graph, context.scope,
params, ratios, context.place)
model_size = context.eval_graph.numel_params()
flops = context.eval_graph.flops()
_logger.debug('################################')
_logger.debug('# pruning eval graph #')
_logger.debug('################################')
self._prune_graph(context.eval_graph, context.optimize_graph)
context.optimize_graph.update_groups_of_conv()
context.eval_graph.update_groups_of_conv()
context.optimize_graph.compile() # to update the compiled program
context.eval_graph.compile(
for_parallel=False,
for_test=True) # to update the compiled program
_logger.info(
'------------------finish pruning--------------------------------'
)
_logger.info('Pruned size: {:.3f}'.format(1 - (float(
context.eval_graph.numel_params()) / model_size)))
_logger.info('Pruned flops: {:.3f}'.format(1 - (float(
context.eval_graph.flops()) / flops)))
metric = self._eval_graph(context)
_logger.info('Metric after pruning: {:.2f}'.format(metric))
_logger.info(
'------------------SensitivePruneStrategy.on_epoch_begin finish--------------------------------'
)
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import collections
from .... import layers
__all__ = ['Pruner', 'StructurePruner']
class Pruner(object):
"""
Base class of all pruners.
"""
def __init__(self):
pass
def prune(self, param):
pass
class StructurePruner(Pruner):
"""
Pruner used to pruning parameters by groups.
"""
def __init__(self, pruning_axis, criterions):
"""
Args:
pruning_axis(dict): The key is the name of parameter to be pruned,
'*' means all the parameters.
The value is the axis to be used. Given a parameter
with shape [3, 4], the result of pruning 50% on axis 1
is a parameter with shape [3, 2].
criterions(dict): The key is the name of parameter to be pruned,
'*' means all the parameters.
The value is the criterion used to sort groups for pruning.
It only supports 'l1_norm' currently.
"""
self.pruning_axis = pruning_axis
self.criterions = criterions
def cal_pruned_idx(self, name, param, ratio, axis=None):
"""
Calculate the index to be pruned on axis by given pruning ratio.
Args:
name(str): The name of parameter to be pruned.
param(np.array): The data of parameter to be pruned.
ratio(float): The ratio to be pruned.
axis(int): The axis to be used for pruning given parameter.
If it is None, the value in self.pruning_axis will be used.
default: None.
Returns:
list<int>: The indexes to be pruned on axis.
"""
criterion = self.criterions[
name] if name in self.criterions else self.criterions['*']
if axis is None:
assert self.pruning_axis is not None, "pruning_axis should set if axis is None."
axis = self.pruning_axis[
name] if name in self.pruning_axis else self.pruning_axis['*']
prune_num = int(round(param.shape[axis] * ratio))
reduce_dims = [i for i in range(len(param.shape)) if i != axis]
if criterion == 'l1_norm':
criterions = np.sum(np.abs(param), axis=tuple(reduce_dims))
pruned_idx = criterions.argsort()[:prune_num]
return pruned_idx
def prune_tensor(self, tensor, pruned_idx, pruned_axis, lazy=False):
"""
Pruning a array by indexes on given axis.
Args:
tensor(numpy.array): The target array to be pruned.
pruned_idx(list<int>): The indexes to be pruned.
pruned_axis(int): The axis of given array to be pruned on.
lazy(bool): True means setting the pruned elements to zero.
False means remove the pruned elements from memory.
default: False.
Returns:
numpy.array: The pruned array.
"""
mask = np.zeros(tensor.shape[pruned_axis], dtype=bool)
mask[pruned_idx] = True
def func(data):
return data[~mask]
def lazy_func(data):
data[mask] = 0
return data
if lazy:
return np.apply_along_axis(lazy_func, pruned_axis, tensor)
else:
return np.apply_along_axis(func, pruned_axis, tensor)
......@@ -16,10 +16,6 @@ from __future__ import print_function
from . import quantization_pass
from .quantization_pass import *
from . import quantization_strategy
from .quantization_strategy import *
from . import mkldnn_post_training_strategy
from .mkldnn_post_training_strategy import *
from . import quant_int8_mkldnn_pass
from .quant_int8_mkldnn_pass import *
from . import quant2_int8_mkldnn_pass
......@@ -29,8 +25,7 @@ from .post_training_quantization import *
from . import imperative
from .imperative import *
__all__ = quantization_pass.__all__ + quantization_strategy.__all__
__all__ += mkldnn_post_training_strategy.__all__
__all__ = quantization_pass.__all__
__all__ += quant_int8_mkldnn_pass.__all__
__all__ += quant2_int8_mkldnn_pass.__all__
__all__ += post_training_quantization.__all__
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import logging
import six
import numpy as np
from .... import core
from ..core.strategy import Strategy
from ....log_helper import get_logger
__all__ = ['MKLDNNPostTrainingQuantStrategy']
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
class MKLDNNPostTrainingQuantStrategy(Strategy):
"""
The strategy for MKL-DNN Post Training quantization strategy.
"""
def __init__(self,
int8_model_save_path=None,
fp32_model_path=None,
cpu_math_library_num_threads=1):
"""
Args:
int8_model_save_path(str): int8_model_save_path is used to save an int8 ProgramDesc
with fp32 weights which is used for MKL-DNN int8 inference. For post training quantization,
MKLDNNPostTrainingQuantStrategy only supports converting a fp32 ProgramDesc
with fp32 weights to an int8 ProgramDesc with fp32 weights now. The saved
int8 ProgramDesc with fp32 weights only can be executed with MKL-DNN enabled.
None means it doesn't save int8 ProgramDesc with fp32 weights. default: None.
fp32_model_path(str): fp32_model_path is used to load an original fp32 ProgramDesc with fp32 weights.
None means it doesn't have a fp32 ProgramDesc with fp32 weights. default: None.
cpu_math_library_num_threads(int): The number of cpu math library threads which is used on
MKLDNNPostTrainingQuantStrategy. 1 means it only uses one cpu math library
thread. default: 1
"""
super(MKLDNNPostTrainingQuantStrategy, self).__init__(0, 0)
self.int8_model_save_path = int8_model_save_path
if fp32_model_path is None:
raise Exception("fp32_model_path is None")
self.fp32_model_path = fp32_model_path
self.cpu_math_library_num_threads = cpu_math_library_num_threads
def on_compression_begin(self, context):
"""
Prepare the data and quantify the model
"""
super(MKLDNNPostTrainingQuantStrategy,
self).on_compression_begin(context)
_logger.info('InferQuantStrategy::on_compression_begin')
# Prepare the Analysis Config
infer_config = core.AnalysisConfig("AnalysisConfig")
infer_config.switch_ir_optim(True)
infer_config.disable_gpu()
infer_config.set_model(self.fp32_model_path)
infer_config.enable_mkldnn()
infer_config.set_cpu_math_library_num_threads(
self.cpu_math_library_num_threads)
# Prepare the data for calculating the quantization scales
warmup_reader = context.eval_reader()
if six.PY2:
data = warmup_reader.next()
if six.PY3:
data = warmup_reader.__next__()
num_images = len(data)
image_data = [img.tolist() for (img, _) in data]
image_data = np.array(image_data).astype("float32").reshape(
[num_images, ] + list(data[0][0].shape))
image_data = image_data.ravel()
images = core.PaddleTensor(image_data, "x")
images.shape = [num_images, ] + list(data[0][0].shape)
label_data = [label for (_, label) in data]
labels = core.PaddleTensor(
np.array(label_data).astype("int64").reshape([num_images, 1]), "y")
warmup_data = [images, labels]
# Enable the INT8 Quantization
infer_config.enable_quantizer()
infer_config.quantizer_config().set_quant_data(warmup_data)
infer_config.quantizer_config().set_quant_batch_size(num_images)
# Run INT8 MKL-DNN Quantization
predictor = core.create_paddle_predictor(infer_config)
if self.int8_model_save_path:
if not os.path.exists(self.int8_model_save_path):
os.makedirs(self.int8_model_save_path)
predictor.SaveOptimModel(self.int8_model_save_path)
_logger.info(
'Finish MKLDNNPostTrainingQuantStrategy::on_compresseion_begin')
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import sys
import numpy as np
from .... import Executor
from .... import io
from .... import core, scope_guard
from ....compiler import CompiledProgram
from ....compiler import BuildStrategy
from ....framework import IrGraph, Variable, Program
from ....log_helper import get_logger
from ..core.strategy import Strategy
from .quantization_pass import *
__all__ = ['QuantizationStrategy']
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
class QuantizationStrategy(Strategy):
"""
The strategy for Quantization.
"""
def __init__(self,
start_epoch=0,
end_epoch=0,
float_model_save_path=None,
mobile_model_save_path=None,
int8_model_save_path=None,
activation_bits=8,
weight_bits=8,
activation_quantize_type='abs_max',
weight_quantize_type='abs_max',
save_in_nodes=None,
save_out_nodes=None):
"""
Args:
start_epoch(int): The 'on_epoch_begin' function will be called in start_epoch. default: 0
end_epoch(int): The 'on_epoch_end' function will be called in end_epoch. default: 0
float_model_save_path(str): The path to save model with float weights.
None means it doesn't save float model. default: None.
mobile_model_save_path(str): The path to save model for paddle-mobile execution.
None means it doesn't save mobile model. default: None.
int8_model_save_path(str): The path to save model with int8_t weight.
None means it doesn't save int8 model. default: None.
activation_bits(int): quantization bit number for activation. default: 8.
weight_bits(int): quantization bit number for weights. The bias is not quantized.
default: 8.
activation_quantize_type(str): quantization type for activation,
now support 'abs_max', 'range_abs_max' and 'moving_average_abs_max'.
If use 'abs_max' mode, the quantization scale will be calculated
dynamically each step in both training and testing period. If use
'range_abs_max', a static quantization scale will be calculated
during training and used in inference.
weight_quantize_type (str): quantization type for weights, support 'abs_max' and 'channel_wise_abs_max'.
The 'range_abs_max' usually is not used for weight, since weights are fixed once the model is well trained.
save_in_nodes(list<str>): A list of variable names used to prune graph
for saving inference model.
save_out_nodes(list<str>): A list of variable names used to prune graph
for saving inference model.
"""
super(QuantizationStrategy, self).__init__(start_epoch, end_epoch)
self.start_epoch = start_epoch
self.end_epoch = end_epoch
self.float_model_save_path = float_model_save_path
self.mobile_model_save_path = mobile_model_save_path
self.int8_model_save_path = int8_model_save_path
self.activation_bits = activation_bits
self.weight_bits = weight_bits
self.activation_quantize_type = activation_quantize_type
self.weight_quantize_type = weight_quantize_type
self.save_out_nodes = save_out_nodes
self.save_in_nodes = save_in_nodes
def restore_from_checkpoint(self, context):
"""
Restore graph when the compression task is inited from checkpoint.
"""
# It is inited from checkpoint and has missed start epoch.
if context.epoch_id != 0 and context.epoch_id > self.start_epoch:
_logger.info("Restore quantization task from checkpoint")
self._modify_graph_for_quantization(context)
_logger.info("Finish restoring quantization task from checkpoint")
def _modify_graph_for_quantization(self, context):
"""
Insert fake_quantize_op and fake_dequantize_op before training and testing.
"""
train_ir_graph = IrGraph(
core.Graph(context.optimize_graph.program.clone().desc),
for_test=False)
test_ir_graph = IrGraph(
core.Graph(context.eval_graph.program.clone().desc), for_test=True)
transform_pass = QuantizationTransformPass(
scope=context.scope,
place=context.place,
weight_bits=self.weight_bits,
activation_bits=self.activation_bits,
activation_quantize_type=self.activation_quantize_type,
weight_quantize_type=self.weight_quantize_type)
transform_pass.apply(train_ir_graph)
transform_pass.apply(test_ir_graph)
# Put persistables created by transform_pass into context.optimize_graph.persistables
# for saving checkpoint.
program_persistables = set()
for var in context.optimize_graph.program.list_vars():
if var.persistable:
program_persistables.add(var.name)
program = Program()
for var_node in train_ir_graph.all_persistable_nodes():
if var_node.name() not in program_persistables:
var_desc = var_node.var()
var = program.global_block().create_var(
name=var_node.name(),
shape=var_desc.shape(),
dtype=var_desc.dtype(),
type=var_desc.type(),
lod_level=var_desc.lod_level())
context.optimize_graph.persistables[var.name] = var
build_strategy = BuildStrategy()
build_strategy.enable_inplace = False
build_strategy.memory_optimize = False
build_strategy.fuse_all_reduce_ops = False
# for quantization training
context.optimize_graph.compiled_graph = CompiledProgram(
train_ir_graph.graph).with_data_parallel(
loss_name=context.optimize_graph.out_nodes['loss'],
build_strategy=build_strategy)
context.eval_graph.program = test_ir_graph.to_program()
# for saving inference model after training
context.put('quantization_test_ir_graph_backup', test_ir_graph)
def on_epoch_begin(self, context):
"""
Insert fake_quantize_op and fake_dequantize_op before training and testing.
"""
super(QuantizationStrategy, self).on_epoch_begin(context)
if self.start_epoch == context.epoch_id:
_logger.info('QuantizationStrategy::on_epoch_begin')
self._modify_graph_for_quantization(context)
_logger.info('Finish QuantizationStrategy::on_epoch_begin')
def on_epoch_end(self, context):
"""
Free and save inference model.
"""
super(QuantizationStrategy, self).on_compression_end(context)
if context.epoch_id == self.end_epoch:
_logger.info('QuantizationStrategy::on_epoch_end')
test_ir_graph = context.get('quantization_test_ir_graph_backup')
# freeze the graph after training
freeze_pass = QuantizationFreezePass(
scope=context.scope,
place=context.place,
weight_bits=self.weight_bits,
activation_bits=self.activation_bits,
weight_quantize_type=self.weight_quantize_type)
freeze_pass.apply(test_ir_graph)
# for other strategies
context.eval_graph.program = test_ir_graph.to_program()
if self.save_out_nodes == None:
out_vars = [
context.eval_graph.var(var_name)._var
for var_name in context.eval_graph.out_nodes.values()
]
else:
out_vars = [
context.eval_graph.var(var_name)._var
for var_name in self.save_out_nodes
]
if self.save_in_nodes == None:
in_vars = list(context.eval_graph.in_nodes.values())
else:
in_vars = self.save_in_nodes
# save float model
if self.float_model_save_path:
executor = Executor(context.place)
with scope_guard(context.scope):
io.save_inference_model(
self.float_model_save_path,
in_vars,
out_vars,
executor,
main_program=test_ir_graph.to_program(),
model_filename='model',
params_filename='weights',
export_for_deployment=True)
# save int8 model
if self.int8_model_save_path:
convert_int8_pass = ConvertToInt8Pass(
scope=context.scope, place=context.place)
convert_int8_pass.apply(test_ir_graph)
executor = Executor(context.place)
with scope_guard(context.scope):
io.save_inference_model(
self.int8_model_save_path,
in_vars,
out_vars,
executor,
main_program=test_ir_graph.to_program(),
model_filename='model',
params_filename='weights',
export_for_deployment=True)
# save mobile model
if self.mobile_model_save_path:
if not self.int8_model_save_path:
# convert the weights as int8_t type
convert_int8_pass = ConvertToInt8Pass(
scope=context.scope, place=context.place)
convert_int8_pass.apply(test_ir_graph)
# make some changes on the graph for the mobile inference
mobile_pass = TransformForMobilePass()
mobile_pass.apply(test_ir_graph)
executor = Executor(context.place)
with scope_guard(context.scope):
io.save_inference_model(
self.mobile_model_save_path,
in_vars,
out_vars,
executor,
main_program=test_ir_graph.to_program(),
model_filename='model',
params_filename='weights',
export_for_deployment=True)
_logger.info('Finish QuantizationStrategy::on_epoch_end')
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import controller
from .controller import *
__all__ = controller.__all__
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The controller used to search hyperparameters or neural architecture"""
import numpy as np
import copy
import math
import logging
from ....log_helper import get_logger
__all__ = ['EvolutionaryController', 'SAController']
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
class EvolutionaryController(object):
"""Abstract controller for all evolutionary searching method.
"""
def __init__(self, *args, **kwargs):
pass
def update(self, tokens, reward):
"""Update the status of controller according current tokens and reward.
Args:
tokens(list<int>): A solution of searching task.
reward(list<int>): The reward of tokens.
"""
raise NotImplementedError('Abstract method.')
def reset(self, range_table, constrain_func=None):
"""Reset the controller.
Args:
range_table(list<int>): It is used to define the searching space of controller.
The tokens[i] generated by controller should be in [0, range_table[i]).
constrain_func(function): It is used to check whether tokens meet the constraint.
None means there is no constraint. Default: None.
"""
raise NotImplementedError('Abstract method.')
def next_tokens(self):
"""Generate new tokens.
"""
raise NotImplementedError('Abstract method.')
class SAController(EvolutionaryController):
"""Simulated annealing controller."""
def __init__(self,
range_table=None,
reduce_rate=0.85,
init_temperature=1024,
max_iter_number=300):
"""Initialize.
Args:
range_table(list<int>): Range table.
reduce_rate(float): The decay rate of temperature.
init_temperature(float): Init temperature.
max_iter_number(int): max iteration number.
"""
super(SAController, self).__init__()
self._range_table = range_table
self._reduce_rate = reduce_rate
self._init_temperature = init_temperature
self._max_iter_number = max_iter_number
self._reward = -1
self._tokens = None
self._max_reward = -1
self._best_tokens = None
self._iter = 0
def __getstate__(self):
d = {}
for key in self.__dict__:
if key != "_constrain_func":
d[key] = self.__dict__[key]
return d
def reset(self, range_table, init_tokens, constrain_func=None):
"""
Reset the status of current controller.
Args:
range_table(list<int>): The range of value in each position of tokens generated by current controller. The range of tokens[i] is [0, range_table[i]).
init_tokens(list<int>): The initial tokens.
constrain_func(function): The callback function used to check whether the tokens meet constraint. None means there is no constraint. Default: None.
"""
self._range_table = range_table
self._constrain_func = constrain_func
self._tokens = init_tokens
self._iter = 0
def update(self, tokens, reward):
"""
Update the controller according to latest tokens and reward.
Args:
tokens(list<int>): The tokens generated in last step.
reward(float): The reward of tokens.
"""
self._iter += 1
temperature = self._init_temperature * self._reduce_rate**self._iter
if (reward > self._reward) or (np.random.random() <= math.exp(
(reward - self._reward) / temperature)):
self._reward = reward
self._tokens = tokens
if reward > self._max_reward:
self._max_reward = reward
self._best_tokens = tokens
_logger.info("iter: {}; max_reward: {}; best_tokens: {}".format(
self._iter, self._max_reward, self._best_tokens))
_logger.info("current_reward: {}; current tokens: {}".format(
self._reward, self._tokens))
def next_tokens(self, control_token=None):
"""
Get next tokens.
"""
if control_token:
tokens = control_token[:]
else:
tokens = self._tokens
new_tokens = tokens[:]
index = int(len(self._range_table) * np.random.random())
new_tokens[index] = (
new_tokens[index] + np.random.randint(self._range_table[index] - 1)
+ 1) % self._range_table[index]
_logger.info("change index[{}] from {} to {}".format(index, tokens[
index], new_tokens[index]))
if self._constrain_func is None:
return new_tokens
for _ in range(self._max_iter_number):
if not self._constrain_func(new_tokens):
index = int(len(self._range_table) * np.random.random())
new_tokens = tokens[:]
new_tokens[index] = np.random.randint(self._range_table[index])
else:
break
return new_tokens
......@@ -138,41 +138,6 @@ if(LINUX AND WITH_MKLDNN)
# Models should be already downloaded for INT8v2 unit tests
set(INT8_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/int8v2")
set(INT8_IC_TEST_FILE "test_mkldnn_int8_quantization_strategy.py")
set(INT8_IC_TEST_FILE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/${INT8_IC_TEST_FILE}")
# googlenet int8
set(INT8_GOOGLENET_MODEL_DIR "${INT8_INSTALL_DIR}/googlenet")
inference_analysis_python_api_int8_test_custom_warmup_batch_size(test_slim_int8_googlenet ${INT8_GOOGLENET_MODEL_DIR} ${IMAGENET_DATA_PATH} ${INT8_IC_TEST_FILE_PATH} 10)
# mobilenet int8
set(INT8_MOBILENET_MODEL_DIR "${INT8_INSTALL_DIR}/mobilenetv1")
inference_analysis_python_api_int8_test(test_slim_int8_mobilenet ${INT8_MOBILENET_MODEL_DIR} ${IMAGENET_DATA_PATH} ${INT8_IC_TEST_FILE_PATH})
inference_analysis_python_api_int8_test_mkldnn(test_slim_int8_mobilenet_mkldnn ${INT8_MOBILENET_MODEL_DIR} ${IMAGENET_DATA_PATH} ${INT8_IC_TEST_FILE_PATH})
# temporarily adding WITH_SLIM_MKLDNN_FULL_TEST FLAG for QA testing the following UTs locally,
# since the following UTs cost too much time on CI test.
if (WITH_SLIM_MKLDNN_FULL_TEST)
# resnet50 int8
set(INT8_RESNET50_MODEL_DIR "${INT8_INSTALL_DIR}/resnet50")
inference_analysis_python_api_int8_test(test_slim_int8_resnet50 ${INT8_RESNET50_MODEL_DIR} ${IMAGENET_DATA_PATH} ${INT8_IC_TEST_FILE_PATH})
# mobilenetv2 int8
set(INT8_MOBILENETV2_MODEL_DIR "${INT8_INSTALL_DIR}/mobilenetv2")
inference_analysis_python_api_int8_test(test_slim_int8_mobilenetv2 ${INT8_MOBILENETV2_MODEL_DIR} ${IMAGENET_DATA_PATH} ${INT8_IC_TEST_FILE_PATH})
# resnet101 int8
set(INT8_RESNET101_MODEL_DIR "${INT8_INSTALL_DIR}/resnet101")
inference_analysis_python_api_int8_test(test_slim_int8_resnet101 ${INT8_RESNET101_MODEL_DIR} ${IMAGENET_DATA_PATH} ${INT8_IC_TEST_FILE_PATH})
# vgg16 int8
set(INT8_VGG16_MODEL_DIR "${INT8_INSTALL_DIR}/vgg16")
inference_analysis_python_api_int8_test(test_slim_int8_vgg16 ${INT8_VGG16_MODEL_DIR} ${IMAGENET_DATA_PATH} ${INT8_IC_TEST_FILE_PATH})
# vgg19 int8
set(INT8_VGG19_MODEL_DIR "${INT8_INSTALL_DIR}/vgg19")
inference_analysis_python_api_int8_test(test_slim_int8_vgg19 ${INT8_VGG19_MODEL_DIR} ${IMAGENET_DATA_PATH} ${INT8_IC_TEST_FILE_PATH})
endif()
#### QUANT & INT8 comparison python api tests
......
version: 1.0
pruners:
pruner_1:
class: 'StructurePruner'
pruning_axis:
'*': 0
criterions:
'*': 'l1_norm'
controllers:
sa_controller:
class: 'SAController'
reduce_rate: 0.9
init_temperature: 1024
max_iter_number: 300
strategies:
auto_pruning_strategy:
class: 'AutoPruneStrategy'
pruner: 'pruner_1'
controller: 'sa_controller'
start_epoch: 0
end_epoch: 2
max_ratio: 0.7
min_ratio: 0.5
pruned_params: '.*_sep_weights'
metric_name: 'acc_top5'
compressor:
epoch: 2
checkpoint_path: './checkpoints_auto_pruning/'
strategies:
- auto_pruning_strategy
version: 1.0
compressor:
epoch: 1
checkpoint_path: './checkpoints/'
#start_epoch: The 'on_epoch_begin' function will be called in start_epoch. default: 0.
#end_epoch: The 'on_epoch_end' function will be called in end_epoch. default: 10.
#delta_rate: The delta used to generate ratios when calculating sensitivities.
#target_ratio: The flops ratio to be pruned from current model.
#metric_name: The metric used to evaluate the model.
#pruned_params: The pattern str to match the parameter names to be pruned.
#sensitivities_file: The sensitivities file.
#num_steps: The number of pruning steps.
#eval_rate: The rate of sampled data used to calculate sensitivities.
version: 1.0
pruners:
pruner_1:
class: 'StructurePruner'
pruning_axis:
'*': 0
criterions:
'*': 'l1_norm'
strategies:
sensitive_pruning_strategy:
class: 'SensitivePruneStrategy'
pruner: 'pruner_1'
start_epoch: 0
delta_rate: 0.1
target_ratio: 0.3
num_steps: 1
eval_rate: 0.5
pruned_params: '.*_sep_weights'
sensitivities_file: 'mobilenet_acc_top1_sensitive.data'
metric_name: 'acc_top1'
compressor:
epoch: 120
checkpoint_path: './checkpoints/'
strategies:
- sensitive_pruning_strategy
#start_epoch(int): The epoch when to merge student graph and teacher graph for
# distillation training. default: 0
#
#end_epoch(int): The epoch when to finish distillation training. default: 0
#
#student_feature_map(str): The name of feature map from student network.
#
#teacher_feature_map(str): The name of feature map from teacher network.
# It's shape should be the same with student network.
#
#student_pairs(list<tuple>): Each tuple, with two variable names, in student_pairs indicates
# a section in student network. The variables in a tuple should
# have the same feature map size.
#
#teacher_pairs(list<tuple>): Each tuple, with two variable names, in teacher_pairs indicates
# a section in teacher network. The variables in a tuple should
# have the same feature map size. Varibale named teacher_pairs[i][j]
# should has the save channel number with that of variable named
# student_pairs[i][j].
#
#distillation_loss_weight(float): The weight of the loss.
version: 1.0
distillers:
fsp_distiller:
class: 'FSPDistiller'
# teacher_pairs: [['teacher_depthwise_conv2d_1.tmp_0', 'teacher_conv2d_3.tmp_0']]
# student_pairs: [['student_depthwise_conv2d_1.tmp_0', 'student_conv2d_3.tmp_0']]
teacher_pairs: [['teacher_conv2_1_dw.tmp_0', 'teacher_conv1.tmp_0']]
student_pairs: [['student_conv2_1_dw.tmp_0', 'student_conv1.tmp_0']]
distillation_loss_weight: 1
l2_distiller:
class: 'L2Distiller'
teacher_feature_map: 'teacher.tmp_1'
student_feature_map: 'student.tmp_1'
distillation_loss_weight: 1
soft_label_distiller:
class: 'SoftLabelDistiller'
student_temperature: 1.0
teacher_temperature: 1.0
teacher_feature_map: 'teacher.tmp_2'
student_feature_map: 'student.tmp_2'
distillation_loss_weight: 0.001
strategies:
distillation_strategy:
class: 'DistillationStrategy'
distillers: ['fsp_distiller', 'l2_distiller', 'soft_label_distiller']
start_epoch: 0
end_epoch: 1
compressor:
epoch: 1
checkpoint_path: './distillation_checkpoints/'
strategies:
- distillation_strategy
#start_epoch: The 'on_epoch_begin' function will be called in start_epoch. default: 0.
#end_epoch: The 'on_epoch_end' function will be called in end_epoch. default: 10.
#delta_rate: The delta used to generate ratios when calculating sensitivities.
#target_ratio: The flops ratio to be pruned from current model.
#metric_name: The metric used to evaluate the model.
#pruned_params: The pattern str to match the parameter names to be pruned.
#sensitivities_file: The sensitivities file.
#num_steps: The number of pruning steps.
#eval_rate: The rate of sampled data used to calculate sensitivities.
version: 1.0
pruners:
pruner_1:
class: 'StructurePruner'
pruning_axis:
'*': 0
criterions:
'*': 'l1_norm'
strategies:
sensitive_pruning_strategy:
class: 'SensitivePruneStrategy'
pruner: 'pruner_1'
start_epoch: 1
delta_rate: 0.2
target_ratio: 0.08
num_steps: 1
eval_rate: 0.5
pruned_params: '_conv6_sep_weights'
sensitivities_file: 'mobilenet_acc_top1_sensitive.data'
metric_name: 'acc_top1'
compressor:
epoch: 2
checkpoint_path: './checkpoints_pruning/'
strategies:
- sensitive_pruning_strategy
version: 1.0
pruners:
pruner_1:
class: 'StructurePruner'
pruning_axis:
'*': 0
criterions:
'*': 'l1_norm'
strategies:
uniform_pruning_strategy:
class: 'UniformPruneStrategy'
pruner: 'pruner_1'
start_epoch: 0
target_ratio: 0.5
pruned_params: 'conv.*'
metric_name: 'acc_top1'
compressor:
epoch: 2
checkpoint_path: './checkpoints_uniform_restore_tmp/'
strategies:
- uniform_pruning_strategy
version: 1.0
pruners:
pruner_1:
class: 'StructurePruner'
pruning_axis:
'*': 0
criterions:
'*': 'l1_norm'
strategies:
uniform_pruning_strategy:
class: 'UniformPruneStrategy'
pruner: 'pruner_1'
start_epoch: 0
target_ratio: 0.5
pruned_params: 'conv.*'
metric_name: 'acc_top1'
compressor:
epoch: 1
checkpoint_path: './checkpoints_uniform_restore/'
strategies:
- uniform_pruning_strategy
version: 1.0
pruners:
pruner_1:
class: 'StructurePruner'
pruning_axis:
'*': 0
criterions:
'*': 'l1_norm'
strategies:
uniform_pruning_strategy:
class: 'UniformPruneStrategy'
pruner: 'pruner_1'
start_epoch: 0
target_ratio: 0.5
pruned_params: 'conv.*'
metric_name: 'acc_top1'
compressor:
epoch: 2
checkpoint_path: './checkpoints_uniform_restore/'
strategies:
- uniform_pruning_strategy
version: 1.0
controllers:
sa_controller:
class: 'SAController'
reduce_rate: 0.9
init_temperature: 1024
max_iter_number: 300
strategies:
light_nas_strategy:
class: 'LightNASStrategy'
controller: 'sa_controller'
target_flops: 629145600
target_latency: 1
end_epoch: 2
retrain_epoch: 1
metric_name: 'acc_top1'
is_server: 1
max_client_num: 100
search_steps: 2
compressor:
epoch: 2
strategies:
- light_nas_strategy
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.fluid.contrib.slim.nas import SearchSpace
from light_nasnet import LightNASNet
import paddle.fluid as fluid
import paddle
import json
import random
total_images = 1281167
lr = 0.1
num_epochs = 1
batch_size = 256
lr_strategy = "cosine_decay"
l2_decay = 4e-5
momentum_rate = 0.9
image_shape = [1, 28, 28]
__all__ = ['LightNASSpace']
NAS_FILTER_SIZE = [[18, 24, 30], [24, 32, 40], [48, 64, 80], [72, 96, 120],
[120, 160, 192]]
NAS_LAYERS_NUMBER = [[1, 2, 3], [2, 3, 4], [3, 4, 5], [2, 3, 4], [2, 3, 4]]
NAS_KERNEL_SIZE = [3, 5]
NAS_FILTERS_MULTIPLIER = [3, 4, 5, 6]
NAS_SHORTCUT = [0, 1]
NAS_SE = [0, 1]
def get_bottleneck_params_list(var):
"""Get bottleneck_params_list from var.
Args:
var: list, variable list.
Returns:
list, bottleneck_params_list.
"""
params_list = [
1, 16, 1, 1, 3, 1, 0, \
6, 24, 2, 2, 3, 1, 0, \
6, 32, 3, 2, 3, 1, 0, \
6, 64, 4, 2, 3, 1, 0, \
6, 96, 3, 1, 3, 1, 0, \
6, 160, 3, 2, 3, 1, 0, \
6, 320, 1, 1, 3, 1, 0, \
]
for i in range(5):
params_list[i * 7 + 7] = NAS_FILTERS_MULTIPLIER[var[i * 6]]
params_list[i * 7 + 8] = NAS_FILTER_SIZE[i][var[i * 6 + 1]]
params_list[i * 7 + 9] = NAS_LAYERS_NUMBER[i][var[i * 6 + 2]]
params_list[i * 7 + 11] = NAS_KERNEL_SIZE[var[i * 6 + 3]]
params_list[i * 7 + 12] = NAS_SHORTCUT[var[i * 6 + 4]]
params_list[i * 7 + 13] = NAS_SE[var[i * 6 + 5]]
return params_list
class LightNASSpace(SearchSpace):
def __init__(self):
super(LightNASSpace, self).__init__()
def init_tokens(self):
"""Get init tokens in search space.
"""
return [
0, 1, 2, 0, 1, 0, 0, 2, 1, 1, 1, 0, 3, 2, 0, 1, 1, 0, 3, 1, 0, 0, 1,
0, 3, 2, 2, 1, 1, 0
]
def range_table(self):
"""Get range table of current search space.
"""
# [NAS_FILTER_SIZE, NAS_LAYERS_NUMBER, NAS_KERNEL_SIZE, NAS_FILTERS_MULTIPLIER, NAS_SHORTCUT, NAS_SE]
return [
4, 3, 3, 2, 2, 2, 4, 3, 3, 2, 2, 2, 4, 3, 3, 2, 2, 2, 4, 3, 3, 2, 2,
2, 4, 3, 3, 2, 2, 2
]
def get_model_latency(self, program):
"""Get model latency according to program.
Returns a random number since it's only for testing.
Args:
program(Program): The program to get latency.
Return:
(float): model latency.
"""
return random.randint(1, 2)
def create_net(self, tokens=None):
"""Create a network for training by tokens.
"""
if tokens is None:
tokens = self.init_tokens()
bottleneck_params_list = get_bottleneck_params_list(tokens)
startup_prog = fluid.Program()
train_prog = fluid.Program()
test_prog = fluid.Program()
train_py_reader, train_cost, train_acc1, train_acc5, global_lr = build_program(
is_train=True,
main_prog=train_prog,
startup_prog=startup_prog,
bottleneck_params_list=bottleneck_params_list)
test_py_reader, test_cost, test_acc1, test_acc5 = build_program(
is_train=False,
main_prog=test_prog,
startup_prog=startup_prog,
bottleneck_params_list=bottleneck_params_list)
test_prog = test_prog.clone(for_test=True)
train_batch_size = batch_size / 1
test_batch_size = batch_size
train_reader = paddle.batch(
paddle.dataset.mnist.train(),
batch_size=train_batch_size,
drop_last=True)
test_reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=test_batch_size)
with fluid.program_guard(train_prog, startup_prog):
train_py_reader.decorate_paddle_reader(train_reader)
with fluid.program_guard(test_prog, startup_prog):
test_py_reader.decorate_paddle_reader(test_reader)
return startup_prog, train_prog, test_prog, (
train_cost, train_acc1, train_acc5,
global_lr), (test_cost, test_acc1,
test_acc5), train_py_reader, test_py_reader
def build_program(is_train,
main_prog,
startup_prog,
bottleneck_params_list=None):
with fluid.program_guard(main_prog, startup_prog):
py_reader = fluid.layers.py_reader(
capacity=16,
shapes=[[-1] + image_shape, [-1, 1]],
lod_levels=[0, 0],
dtypes=["float32", "int64"],
use_double_buffer=False)
with fluid.unique_name.guard():
image, label = fluid.layers.read_file(py_reader)
model = LightNASNet()
avg_cost, acc_top1, acc_top5 = net_config(
image,
label,
model,
class_dim=10,
bottleneck_params_list=bottleneck_params_list,
scale_loss=1.0)
avg_cost.persistable = True
acc_top1.persistable = True
acc_top5.persistable = True
if is_train:
params = model.params
params["total_images"] = total_images
params["lr"] = lr
params["num_epochs"] = num_epochs
params["learning_strategy"]["batch_size"] = batch_size
params["learning_strategy"]["name"] = lr_strategy
params["l2_decay"] = l2_decay
params["momentum_rate"] = momentum_rate
optimizer = optimizer_setting(params)
optimizer.minimize(avg_cost)
global_lr = optimizer._global_learning_rate()
if is_train:
return py_reader, avg_cost, acc_top1, acc_top5, global_lr
else:
return py_reader, avg_cost, acc_top1, acc_top5
def net_config(image,
label,
model,
class_dim=1000,
bottleneck_params_list=None,
scale_loss=1.0):
bottleneck_params_list = [
bottleneck_params_list[i:i + 7]
for i in range(0, len(bottleneck_params_list), 7)
]
out = model.net(input=image,
bottleneck_params_list=bottleneck_params_list,
class_dim=class_dim)
cost, pred = fluid.layers.softmax_with_cross_entropy(
out, label, return_softmax=True)
if scale_loss > 1:
avg_cost = fluid.layers.mean(x=cost) * float(scale_loss)
else:
avg_cost = fluid.layers.mean(x=cost)
acc_top1 = fluid.layers.accuracy(input=pred, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=pred, label=label, k=5)
return avg_cost, acc_top1, acc_top5
def optimizer_setting(params):
"""optimizer setting.
Args:
params: dict, params.
"""
ls = params["learning_strategy"]
l2_decay = params["l2_decay"]
momentum_rate = params["momentum_rate"]
if ls["name"] == "piecewise_decay":
if "total_images" not in params:
total_images = IMAGENET1000
else:
total_images = params["total_images"]
batch_size = ls["batch_size"]
step = int(total_images / batch_size + 1)
bd = [step * e for e in ls["epochs"]]
base_lr = params["lr"]
lr = []
lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)]
optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr),
momentum=momentum_rate,
regularization=fluid.regularizer.L2Decay(l2_decay))
elif ls["name"] == "cosine_decay":
if "total_images" not in params:
total_images = IMAGENET1000
else:
total_images = params["total_images"]
batch_size = ls["batch_size"]
step = int(total_images / batch_size + 1)
lr = params["lr"]
num_epochs = params["num_epochs"]
optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.cosine_decay(
learning_rate=lr, step_each_epoch=step, epochs=num_epochs),
momentum=momentum_rate,
regularization=fluid.regularizer.L2Decay(l2_decay))
elif ls["name"] == "cosine_warmup_decay":
if "total_images" not in params:
total_images = IMAGENET1000
else:
total_images = params["total_images"]
batch_size = ls["batch_size"]
l2_decay = params["l2_decay"]
momentum_rate = params["momentum_rate"]
step = int(math.ceil(float(total_images) / batch_size))
lr = params["lr"]
num_epochs = params["num_epochs"]
optimizer = fluid.optimizer.Momentum(
learning_rate=cosine_decay_with_warmup(
learning_rate=lr, step_each_epoch=step, epochs=num_epochs),
momentum=momentum_rate,
regularization=fluid.regularizer.L2Decay(l2_decay))
elif ls["name"] == "linear_decay":
if "total_images" not in params:
total_images = IMAGENET1000
else:
total_images = params["total_images"]
batch_size = ls["batch_size"]
num_epochs = params["num_epochs"]
start_lr = params["lr"]
end_lr = 0
total_step = int((total_images / batch_size) * num_epochs)
lr = fluid.layers.polynomial_decay(
start_lr, total_step, end_lr, power=1)
optimizer = fluid.optimizer.Momentum(
learning_rate=lr,
momentum=momentum_rate,
regularization=fluid.regularizer.L2Decay(l2_decay))
elif ls["name"] == "adam":
lr = params["lr"]
optimizer = fluid.optimizer.Adam(learning_rate=lr)
else:
lr = params["lr"]
optimizer = fluid.optimizer.Momentum(
learning_rate=lr,
momentum=momentum_rate,
regularization=fluid.regularizer.L2Decay(l2_decay))
return optimizer
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""LightNASNet."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
__all__ = ['LightNASNet']
train_parameters = {
"input_size": [3, 224, 224],
"input_mean": [0.485, 0.456, 0.406],
"input_std": [0.229, 0.224, 0.225],
"learning_strategy": {
"name": "piecewise_decay",
"batch_size": 256,
"epochs": [30, 60, 90],
"steps": [0.1, 0.01, 0.001, 0.0001]
}
}
class LightNASNet(object):
"""LightNASNet."""
def __init__(self):
self.params = train_parameters
def net(self, input, bottleneck_params_list=None, class_dim=1000,
scale=1.0):
"""Build network.
Args:
input: Variable, input.
class_dim: int, class dim.
scale: float, scale.
Returns:
Variable, network output.
"""
if bottleneck_params_list is None:
# MobileNetV2
# bottleneck_params_list = [
# (1, 16, 1, 1, 3, 1, 0),
# (6, 24, 2, 2, 3, 1, 0),
# (6, 32, 3, 2, 3, 1, 0),
# (6, 64, 4, 2, 3, 1, 0),
# (6, 96, 3, 1, 3, 1, 0),
# (6, 160, 3, 2, 3, 1, 0),
# (6, 320, 1, 1, 3, 1, 0),
# ]
bottleneck_params_list = [
(1, 16, 1, 1, 3, 1, 0),
(3, 24, 3, 2, 3, 1, 0),
(3, 40, 3, 2, 5, 1, 0),
(6, 80, 3, 2, 5, 1, 0),
(6, 96, 2, 1, 3, 1, 0),
(6, 192, 4, 2, 5, 1, 0),
(6, 320, 1, 1, 3, 1, 0),
]
#conv1
input = self.conv_bn_layer(
input,
num_filters=int(32 * scale),
filter_size=3,
stride=2,
padding=1,
if_act=True,
name='conv1_1')
# bottleneck sequences
i = 1
in_c = int(32 * scale)
for layer_setting in bottleneck_params_list:
t, c, n, s, k, ifshortcut, ifse = layer_setting
i += 1
input = self.invresi_blocks(
input=input,
in_channel=in_c,
expansion=t,
out_channel=int(c * scale),
num_layers=n,
stride=s,
filter_size=k,
shortcut=ifshortcut,
squeeze=ifse,
name='conv' + str(i))
in_c = int(c * scale)
#last_conv
input = self.conv_bn_layer(
input=input,
num_filters=int(1280 * scale) if scale > 1.0 else 1280,
filter_size=1,
stride=1,
padding=0,
if_act=True,
name='conv9')
input = fluid.layers.pool2d(
input=input,
pool_size=7,
pool_stride=1,
pool_type='avg',
global_pooling=True)
output = fluid.layers.fc(input=input,
size=class_dim,
param_attr=ParamAttr(name='fc10_weights'),
bias_attr=ParamAttr(name='fc10_offset'))
return output
def conv_bn_layer(self,
input,
filter_size,
num_filters,
stride,
padding,
num_groups=1,
if_act=True,
name=None,
use_cudnn=True):
"""Build convolution and batch normalization layers.
Args:
input: Variable, input.
filter_size: int, filter size.
num_filters: int, number of filters.
stride: int, stride.
padding: int, padding.
num_groups: int, number of groups.
if_act: bool, whether using activation.
name: str, name.
use_cudnn: bool, whether use cudnn.
Returns:
Variable, layers output.
"""
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
act=None,
use_cudnn=use_cudnn,
param_attr=ParamAttr(name=name + '_weights'),
bias_attr=False)
bn_name = name + '_bn'
bn = fluid.layers.batch_norm(
input=conv,
param_attr=ParamAttr(name=bn_name + "_scale"),
bias_attr=ParamAttr(name=bn_name + "_offset"),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
if if_act:
return fluid.layers.relu6(bn)
else:
return bn
def shortcut(self, input, data_residual):
"""Build shortcut layer.
Args:
input: Variable, input.
data_residual: Variable, residual layer.
Returns:
Variable, layer output.
"""
return fluid.layers.elementwise_add(input, data_residual)
def squeeze_excitation(self,
input,
num_channels,
reduction_ratio,
name=None):
"""Build squeeze excitation layers.
Args:
input: Variable, input.
num_channels: int, number of channels.
reduction_ratio: float, reduction ratio.
name: str, name.
Returns:
Variable, layers output.
"""
pool = fluid.layers.pool2d(
input=input, pool_size=0, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
squeeze = fluid.layers.fc(
input=pool,
size=num_channels // reduction_ratio,
act='relu',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=name + '_sqz_weights'),
bias_attr=ParamAttr(name=name + '_sqz_offset'))
stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0)
excitation = fluid.layers.fc(
input=squeeze,
size=num_channels,
act='sigmoid',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=name + '_exc_weights'),
bias_attr=ParamAttr(name=name + '_exc_offset'))
scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0)
return scale
def inverted_residual_unit(self,
input,
num_in_filter,
num_filters,
ifshortcut,
ifse,
stride,
filter_size,
expansion_factor,
reduction_ratio=4,
name=None):
"""Build inverted residual unit.
Args:
input(Variable): Theinput.
num_in_filter(int): The number of input filters.
num_filters(int): The number of filters.
ifshortcut(bool): Whether to use shortcut.
stride(int): The stride.
filter_size(int): The filter size.
padding(int): The padding.
expansion_factor(float): Expansion factor.
name(str): The name.
Returns:
Variable, layers output.
"""
num_expfilter = int(round(num_in_filter * expansion_factor))
channel_expand = self.conv_bn_layer(
input=input,
num_filters=num_expfilter,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
if_act=True,
name=name + '_expand')
bottleneck_conv = self.conv_bn_layer(
input=channel_expand,
num_filters=num_expfilter,
filter_size=filter_size,
stride=stride,
padding=int((filter_size - 1) / 2),
num_groups=num_expfilter,
if_act=True,
name=name + '_dwise',
use_cudnn=False)
linear_out = self.conv_bn_layer(
input=bottleneck_conv,
num_filters=num_filters,
filter_size=1,
stride=1,
padding=0,
num_groups=1,
if_act=False,
name=name + '_linear')
out = linear_out
if ifshortcut:
out = self.shortcut(input=input, data_residual=out)
if ifse:
scale = self.squeeze_excitation(
input=linear_out,
num_channels=num_filters,
reduction_ratio=reduction_ratio,
name=name + '_fc')
out = fluid.layers.elementwise_add(x=out, y=scale, act='relu')
return out
def invresi_blocks(self,
input,
in_channel,
expansion,
out_channel,
num_layers,
stride,
filter_size,
shortcut,
squeeze,
name=None):
"""Build inverted residual blocks.
Args:
input(Variable): The input feture map.
in_channel(int): The number of input channel.
expansion(float): Expansion factor.
out_channel(int): The number of output channel.
num_layers(int): The number of layers.
stride(int): The stride.
filter_size(int): The size of filter.
shortcut(bool): Whether to add shortcut layers.
squeeze(bool): Whether to add squeeze excitation layers.
name(str): The name.
Returns:
Variable, layers output.
"""
first_block = self.inverted_residual_unit(
input=input,
num_in_filter=in_channel,
num_filters=out_channel,
ifshortcut=False,
ifse=squeeze,
stride=stride,
filter_size=filter_size,
expansion_factor=expansion,
name=name + '_1')
last_residual_block = first_block
last_c = out_channel
for i in range(1, num_layers):
last_residual_block = self.inverted_residual_unit(
input=last_residual_block,
num_in_filter=last_c,
num_filters=out_channel,
ifshortcut=shortcut,
ifse=squeeze,
stride=1,
filter_size=filter_size,
expansion_factor=expansion,
name=name + '_' + str(i + 1))
return last_residual_block
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid
from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr
__all__ = ['MobileNet']
train_parameters = {
"input_size": [3, 224, 224],
"input_mean": [0.485, 0.456, 0.406],
"input_std": [0.229, 0.224, 0.225],
"learning_strategy": {
"name": "piecewise_decay",
"batch_size": 256,
"epochs": [30, 60, 90],
"steps": [0.1, 0.01, 0.001, 0.0001]
}
}
class MobileNet():
def __init__(self, name=""):
self.params = train_parameters
self.name = name
def net(self, input, class_dim=1000, scale=1.0):
# conv1: 112x112
input = self.conv_bn_layer(
input,
filter_size=3,
channels=3,
num_filters=int(32 * scale),
stride=2,
padding=1,
name=self.name + "_conv1")
# 56x56
input = self.depthwise_separable(
input,
num_filters1=32,
num_filters2=64,
num_groups=32,
stride=1,
scale=scale,
name=self.name + "_conv2_1")
input = self.depthwise_separable(
input,
num_filters1=64,
num_filters2=128,
num_groups=64,
stride=2,
scale=scale,
name=self.name + "_conv2_2")
# 28x28
input = self.depthwise_separable(
input,
num_filters1=128,
num_filters2=128,
num_groups=128,
stride=1,
scale=scale,
name=self.name + "_conv3_1")
input = self.depthwise_separable(
input,
num_filters1=128,
num_filters2=256,
num_groups=128,
stride=2,
scale=scale,
name=self.name + "_conv3_2")
# 14x14
input = self.depthwise_separable(
input,
num_filters1=256,
num_filters2=256,
num_groups=256,
stride=1,
scale=scale,
name=self.name + "_conv4_1")
input = self.depthwise_separable(
input,
num_filters1=256,
num_filters2=512,
num_groups=256,
stride=2,
scale=scale,
name=self.name + "_conv4_2")
# 14x14
for i in range(5):
input = self.depthwise_separable(
input,
num_filters1=512,
num_filters2=512,
num_groups=512,
stride=1,
scale=scale,
name=self.name + "_conv5" + "_" + str(i + 1))
# 7x7
input = self.depthwise_separable(
input,
num_filters1=512,
num_filters2=1024,
num_groups=512,
stride=2,
scale=scale,
name=self.name + "_conv5_6")
input = self.depthwise_separable(
input,
num_filters1=1024,
num_filters2=1024,
num_groups=1024,
stride=1,
scale=scale,
name=self.name + "_conv6")
input = fluid.layers.pool2d(
input=input,
pool_size=0,
pool_stride=1,
pool_type='avg',
global_pooling=True)
output = fluid.layers.fc(
input=input,
size=class_dim,
act='softmax',
param_attr=ParamAttr(
initializer=MSRA(), name=self.name + "_fc7_weights"),
bias_attr=ParamAttr(name=self.name + "_fc7_offset"),
name=self.name)
return output
def conv_bn_layer(self,
input,
filter_size,
num_filters,
stride,
padding,
channels=None,
num_groups=1,
act='relu',
use_cudnn=True,
name=None):
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
act=None,
use_cudnn=use_cudnn,
param_attr=ParamAttr(
initializer=MSRA(), name=name + "_weights"),
name=name,
bias_attr=False)
bn_name = name + "_bn"
return fluid.layers.batch_norm(
input=conv,
act=act,
name=name,
param_attr=ParamAttr(name=bn_name + "_scale"),
bias_attr=ParamAttr(name=bn_name + "_offset"),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
def depthwise_separable(self,
input,
num_filters1,
num_filters2,
num_groups,
stride,
scale,
name=None):
depthwise_conv = self.conv_bn_layer(
input=input,
filter_size=3,
num_filters=int(num_filters1 * scale),
stride=stride,
padding=1,
num_groups=int(num_groups * scale),
use_cudnn=False,
name=name + "_dw")
pointwise_conv = self.conv_bn_layer(
input=depthwise_conv,
filter_size=1,
num_filters=int(num_filters2 * scale),
stride=1,
padding=0,
name=name + "_sep")
return pointwise_conv
#start_epoch(int): The epoch to insert quantization operators. default: 0
#
#end_epoch(int): The epoch to save inference model. default: 0
#
#float_model_save_path(str): The path to save model with float weights.
# None means it doesn't save float model. default: None.
#
#mobile_model_save_path(str): The path to save model for paddle-mobile execution.
# None means it doesn't save mobile model. default: None.
#
#int8_model_save_path(str): The path to save model with int8_t weight.
# None means it doesn't save int8 model. default: None.
#
#activation_bits(int): quantization bit number for activation. default: 8.
#
#weight_bits(int): quantization bit number for weights. The bias is not quantized.
# default: 8.
#
#activation_quantize_type(str): quantization type for activation,
# now support 'abs_max', 'range_abs_max' and 'moving_average_abs_max'.
# If use 'abs_max' mode, the quantization scale will be calculated
# dynamically each step in both training and testing period. If use
# 'range_abs_max', a static quantization scale will be calculated
# during training and used in inference.
#
#save_in_nodes(list<str>): A list of variable names used to prune graph
# for saving inference model.
#
#save_out_nodes(list<str>): A list of variable names used to prune graph
# for saving inference model.
version: 1.0
strategies:
quantization_strategy:
class: 'QuantizationStrategy'
start_epoch: 0
end_epoch: 0
float_model_save_path: './output/float'
mobile_model_save_path: './output/mobile'
int8_model_save_path: './output/int8'
weight_bits: 8
activation_bits: 8
weight_quantize_type: 'abs_max'
activation_quantize_type: 'abs_max'
save_in_nodes: ['image']
save_out_nodes: ['quan.tmp_2']
compressor:
epoch: 1
checkpoint_path: './checkpoints_quan/'
strategies:
- quantization_strategy
#start_epoch(int): The epoch to insert quantization operators. default: 0
#
#end_epoch(int): The epoch to save inference model. default: 0
#
#float_model_save_path(str): The path to save model with float weights.
# None means it doesn't save float model. default: None.
#
#mobile_model_save_path(str): The path to save model for paddle-mobile execution.
# None means it doesn't save mobile model. default: None.
#
#int8_model_save_path(str): The path to save model with int8_t weight.
# None means it doesn't save int8 model. default: None.
#
#activation_bits(int): quantization bit number for activation. default: 8.
#
#weight_bits(int): quantization bit number for weights. The bias is not quantized.
# default: 8.
#
#activation_quantize_type(str): quantization type for activation,
# now support 'abs_max', 'range_abs_max' and 'moving_average_abs_max'.
# If use 'abs_max' mode, the quantization scale will be calculated
# dynamically each step in both training and testing period. If use
# 'range_abs_max', a static quantization scale will be calculated
# during training and used in inference.
#
#save_in_nodes(list<str>): A list of variable names used to prune graph
# for saving inference model.
#
#save_out_nodes(list<str>): A list of variable names used to prune graph
# for saving inference model.
version: 1.0
strategies:
quantization_strategy:
class: 'QuantizationStrategy'
start_epoch: 0
end_epoch: 0
float_model_save_path: './output/float'
mobile_model_save_path: './output/mobile'
int8_model_save_path: './output/int8'
weight_bits: 8
activation_bits: 8
weight_quantize_type: 'abs_max'
activation_quantize_type: 'abs_max'
save_in_nodes: ['image']
save_out_nodes: ['quan.tmp_2']
compressor:
epoch: 2
checkpoint_path: './checkpoints_quan/'
strategies:
- quantization_strategy
#start_epoch(int): The epoch to insert quantization operators. default: 0
#
#end_epoch(int): The epoch to save inference model. default: 0
#
#float_model_save_path(str): The path to save model with float weights.
# None means it doesn't save float model. default: None.
#
#mobile_model_save_path(str): The path to save model for paddle-mobile execution.
# None means it doesn't save mobile model. default: None.
#
#int8_model_save_path(str): The path to save model with int8_t weight.
# None means it doesn't save int8 model. default: None.
#
#activation_bits(int): quantization bit number for activation. default: 8.
#
#weight_bits(int): quantization bit number for weights. The bias is not quantized.
# default: 8.
#
#activation_quantize_type(str): quantization type for activation,
# now support 'abs_max', 'range_abs_max' and 'moving_average_abs_max'.
# If use 'abs_max' mode, the quantization scale will be calculated
# dynamically each step in both training and testing period. If use
# 'range_abs_max', a static quantization scale will be calculated
# during training and used in inference.
#
#save_in_nodes(list<str>): A list of variable names used to prune graph
# for saving inference model.
#
#save_out_nodes(list<str>): A list of variable names used to prune graph
# for saving inference model.
version: 1.0
strategies:
quantization_strategy:
class: 'QuantizationStrategy'
start_epoch: 0
end_epoch: 0
float_model_save_path: './output/float'
mobile_model_save_path: './output/mobile'
int8_model_save_path: './output/int8'
weight_bits: 8
activation_bits: 8
weight_quantize_type: 'abs_max'
activation_quantize_type: 'abs_max'
save_in_nodes: ['image']
save_out_nodes: ['quan.tmp_2']
compressor:
epoch: 2
checkpoint_path: './checkpoints_quan_2/'
strategies:
- quantization_strategy
#int8_model_save_path(str): int8_model_save_path is used to save an int8 ProgramDesc with
# fp32 weights which is used for MKL-DNN int8 inference. For post training quantization,
# MKLDNNPostTrainingQuantStrategy only supports converting a fp32 ProgramDesc
# with fp32 weights to an int8 ProgramDesc with fp32 weights now. The saved
# int8 ProgramDesc with fp32 weights only can be executed with MKL-DNN enabled.
# None means it doesn't save int8 ProgramDesc with fp32 weights. default: None.
#
#fp32_model_path(str): fp32_model_path is used to load an original fp32 ProgramDesc with fp32 weights.
# None means it doesn't have a fp32 ProgramDesc with fp32 weights. default: None.
#
#cpu_math_library_num_threads(int): The number of cpu math library threads which is used on
# MKLDNNPostTrainingQuantStrategy. 1 means it only uses one cpu math library
# thread. default: 1
# Note: Here we set the cpu_math_library_num_threads to 4 which is the maximum number of
# cpu math library threads on CI machine.
#
version: 1.0
strategies:
mkldnn_post_training_strategy:
class: 'MKLDNNPostTrainingQuantStrategy'
int8_model_save_path: 'OUTPUT_PATH'
fp32_model_path: 'MODEL_PATH'
cpu_math_library_num_threads: 4
compressor:
epoch: 0
checkpoint_path: ''
strategies:
- mkldnn_post_training_strategy
# PaddleSlim Post-training quantization (MKL-DNN INT8)
This document describes how to use [PaddleSlim](https://github.com/PaddlePaddle/models/blob/develop/PaddleSlim/docs/usage.md) to convert a FP32 ProgramDesc with FP32 weights to an INT8 ProgramDesc with FP32 weights on GoogleNet, MobileNet-V1, MobileNet-V2, ResNet-101, ResNet-50, VGG16 and VGG19. We provide the instructions on how to enable MKL-DNN INT8 calibration in PaddleSlim and show the results of accuracy on all the 7 models as mentioned.
## 0. Prerequisite
You need to install at least PaddlePaddle-1.5 python package `pip install paddlepaddle==1.5`.
## 1. How to generate INT8 ProgramDesc with FP32 weights
You can refer to the usage doc of [PaddleSlim](https://github.com/PaddlePaddle/models/blob/develop/PaddleSlim/docs/usage.md) in section 1.2 for details that how to use PaddleSlim Compressor. But for PaddleSlim Post-training quantization with MKL-DNN INT8, there are two differences.
* Differences in `paddle.fluid.contrib.slim.Compressor` arguments
Since the only one requirement in PaddleSlim Post-training quantization with MKL-DNN INT8 is the reader of warmup dataset, so you need to set other parameters of `paddle.fluid.contrib.slim.Compressor` to None, [] or ''.
```python
com_pass = Compressor(
place=None, # not required, set to None
scope=None, # not required, set to None
train_program=None, # not required, set to None
train_reader=None, # not required, set to None
train_feed_list=[], # not required, set to []
train_fetch_list=[], # not required, set to []
eval_program=None, # not required, set to None
eval_reader=reader, # required, the reader of warmup dataset
eval_feed_list=[], # not required, set to []
eval_fetch_list=[], # not required, set to []
teacher_programs=[], # not required, set to []
checkpoint_path='', # not required, set to ''
train_optimizer=None, # not required, set to None
distiller_optimizer=None # not required, set to None
)
```
* Differences in yaml config
An example yaml config is listed below, for more details, you can refer to [config_mkldnn_int8.yaml](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/fluid/contrib/slim/tests/quantization/config_mkldnn_int8.yaml) which is used in unit test.
```yaml
version: 1.0
strategies:
mkldnn_post_training_strategy:
class: 'MKLDNNPostTrainingQuantStrategy' # required, class name of MKL-DNN INT8 Post-training quantization strategy
int8_model_save_path: 'OUTPUT_PATH' # required, int8 ProgramDesc with fp32 weights
fp32_model_path: 'MODEL_PATH' # required, fp32 ProgramDesc with fp32 weights
cpu_math_library_num_threads: 1 # required, The number of cpu math library threads
compressor:
epoch: 0 # not required, set to 0
checkpoint_path: '' # not required, set to ''
strategies:
- mkldnn_post_training_strategy
```
## 2. How to run INT8 ProgramDesc with fp32 weights
You can load INT8 ProgramDesc with fp32 weights by load_inference_model [API](https://github.com/PaddlePaddle/Paddle/blob/8b50ad80ff6934512d3959947ac1e71ea3fb9ea3/python/paddle/fluid/io.py#L991) and run INT8 inference similar as [FP32](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/object_detection/eval.py "FP32").
```python
[infer_program, feed_dict, fetch_targets] = fluid.io.load_inference_model(model_path, exe)
```
## 3. Result
We provide the results of accuracy measured on Intel(R) Xeon(R) Gold 6271.
>**I. Top-1 Accuracy on Intel(R) Xeon(R) Gold 6271**
>**Dataset: ILSVRC2012 Validation dataset**
| Model | FP32 Accuracy | INT8 Accuracy | Accuracy Diff(FP32-INT8) |
| :----------: | :-------------: | :------------: | :--------------: |
| GoogleNet | 70.50% | 69.81% | 0.69% |
| MobileNet-V1 | 70.78% | 70.42% | 0.36% |
| MobileNet-V2 | 71.90% | 71.35% | 0.55% |
| ResNet-101 | 77.50% | 77.42% | 0.08% |
| ResNet-50 | 76.63% | 76.52% | 0.11% |
| VGG16 | 72.08% | 72.03% | 0.05% |
| VGG19 | 72.57% | 72.55% | 0.02% |
Notes:
* MKL-DNN and MKL are required.
## 4. How to reproduce the results
Three steps to reproduce the above-mentioned accuracy results, and we take GoogleNet benchmark as an example:
* ### Prepare dataset
You can run the following commands to download and preprocess the ILSVRC2012 Validation dataset.
```bash
cd /PATH/TO/PADDLE
python ./paddle/fluid/inference/tests/api/full_ILSVRC2012_val_preprocess.py
```
Then the ILSVRC2012 Validation dataset will be preprocessed and saved by default in `~/.cache/paddle/dataset/int8/download/int8_full_val.bin`
* ### Prepare model
You can run the following commands to download GoogleNet model.
```bash
mkdir -p /PATH/TO/DOWNLOAD/MODEL/
cd /PATH/TO/DOWNLOAD/MODEL/
export MODEL_NAME=GoogleNet
wget http://paddle-inference-dist.bj.bcebos.com/int8/${MODEL_NAME}_int8_model.tar.gz
mkdir -p ${MODEL_NAME}
tar -xvf ${MODEL_NAME}_int8_model.tar.gz -C ${MODEL_NAME}
```
To download and verify all the 7 models, you need to set `MODEL_NAME` to one of the following values in command line:
```text
MODEL_NAME=GoogleNet, mobilenetv1, mobilenet_v2, Res101, resnet50, VGG16, VGG19
```
* ### Commands to reproduce benchmark
You can run `test_mkldnn_int8_quantization_strategy.py` with the following arguments to reproduce the accuracy result on GoogleNet.
``` bash
cd /PATH/TO/PADDLE/python/paddle/fluid/contrib/slim/tests/
python ./test_mkldnn_int8_quantization_strategy.py --infer_model /PATH/TO/DOWNLOAD/MODEL/${MODEL_NAME}/model --infer_data ~/.cache/paddle/dataset/int8/download/int8_full_val.bin --warmup_batch_size 100 --batch_size 1
```
Notes:
* The above commands will cost maybe several hours in the prediction stage (include int8 prediction and fp32 prediction) since there have 50000 pictures need to be predicted in `int8_full_val.bin`
* Running the above command with environment variable `FLAGS_use_mkldnn=true` will make the FP32 part of the test running using MKL-DNN (the INT8 part uses MKL-DNN either way).
# copyright (c) 2019 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import paddle
import unittest
import paddle.fluid as fluid
from mobilenet import MobileNet
from paddle.fluid.contrib.slim.core import Compressor
from paddle.fluid.contrib.slim.graph import GraphWrapper
class TestFilterPruning(unittest.TestCase):
def test_compression(self):
"""
Model: mobilenet_v1
data: mnist
step1: Training one epoch
step2: pruning flops
step3: fine-tune one epoch
step4: check top1_acc.
"""
if not fluid.core.is_compiled_with_cuda():
return
class_dim = 10
image_shape = [1, 28, 28]
image = fluid.layers.data(
name='image', shape=image_shape, dtype='float32')
image.stop_gradient = False
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
out = MobileNet("auto_pruning").net(input=image, class_dim=class_dim)
acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
val_program = fluid.default_main_program().clone(for_test=False)
cost = fluid.layers.cross_entropy(input=out, label=label)
avg_cost = fluid.layers.mean(x=cost)
optimizer = fluid.optimizer.Momentum(
momentum=0.9,
learning_rate=0.01,
regularization=fluid.regularizer.L2Decay(4e-5))
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128)
val_feed_list = [('img', image.name), ('label', label.name)]
val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5',
acc_top5.name)]
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128)
train_feed_list = [('img', image.name), ('label', label.name)]
train_fetch_list = [('loss', avg_cost.name)]
com_pass = Compressor(
place,
fluid.global_scope(),
fluid.default_main_program(),
train_reader=train_reader,
train_feed_list=train_feed_list,
train_fetch_list=train_fetch_list,
eval_program=val_program,
eval_reader=val_reader,
eval_feed_list=val_feed_list,
eval_fetch_list=val_fetch_list,
train_optimizer=optimizer)
com_pass.config('./auto_pruning/compress.yaml')
eval_graph = com_pass.run()
if __name__ == '__main__':
unittest.main()
# copyright (c) 2019 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import paddle
import unittest
import os
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.contrib.slim.core import Compressor
from paddle.fluid.contrib.slim.graph import GraphWrapper
class TestCompressor(unittest.TestCase):
def test_eval_func(self):
class_dim = 10
image_shape = [1, 28, 28]
image = fluid.layers.data(
name='image', shape=image_shape, dtype='float32')
image.stop_gradient = False
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
out = fluid.layers.fc(input=image, size=class_dim)
out = fluid.layers.softmax(out)
acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
val_program = fluid.default_main_program().clone(for_test=False)
cost = fluid.layers.cross_entropy(input=out, label=label)
avg_cost = fluid.layers.mean(x=cost)
optimizer = fluid.optimizer.Momentum(
momentum=0.9,
learning_rate=0.01,
regularization=fluid.regularizer.L2Decay(4e-5))
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128)
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128)
train_feed_list = [('img', image.name), ('label', label.name)]
train_fetch_list = [('loss', avg_cost.name)]
eval_feed_list = [('img', image.name), ('label', label.name)]
eval_fetch_list = [('acc_top1', acc_top1.name)]
def eval_func(program, scope):
place = fluid.CPUPlace()
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(
feed_list=[image.name, label.name],
place=place,
program=program)
results = []
for data in val_reader():
result = exe.run(program=program,
scope=scope,
fetch_list=[acc_top1.name],
feed=feeder.feed(data))
results.append(np.array(result))
result = np.mean(results)
return result
com_pass = Compressor(
place,
fluid.global_scope(),
fluid.default_main_program(),
train_reader=train_reader,
train_feed_list=train_feed_list,
train_fetch_list=train_fetch_list,
eval_program=val_program,
eval_feed_list=eval_feed_list,
eval_fetch_list=eval_fetch_list,
eval_func={"score": eval_func},
prune_infer_model=[[image.name], [out.name]],
train_optimizer=optimizer)
com_pass.config('./configs/compress.yaml')
com_pass.run()
self.assertTrue('score' in com_pass.context.eval_results)
self.assertTrue(float(com_pass.context.eval_results['score'][0]) > 0.9)
self.assertTrue(os.path.exists("./checkpoints/0/eval_model/__model__"))
self.assertTrue(
os.path.exists("./checkpoints/0/eval_model/__model__.infer"))
self.assertTrue(os.path.exists("./checkpoints/0/eval_model/__params__"))
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.fluid.contrib.slim.core import ConfigFactory
import unittest
class TestFactory(unittest.TestCase):
def test_parse_pruning(self):
factory = ConfigFactory('./configs/filter_pruning.yaml')
pruner_1 = factory.instance('pruner_1')
self.assertEquals(pruner_1.pruning_axis['*'], 0)
self.assertEquals(pruner_1.criterions['*'], 'l1_norm')
strategy = factory.instance('sensitive_pruning_strategy')
pruner_1 = strategy.pruner
self.assertEquals(pruner_1.criterions['*'], 'l1_norm')
self.assertEquals(strategy.start_epoch, 0)
self.assertEquals(strategy.sensitivities_file,
'mobilenet_acc_top1_sensitive.data')
if __name__ == '__main__':
unittest.main()
# copyright (c) 2019 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import paddle
import unittest
import paddle.fluid as fluid
import numpy as np
from mobilenet import MobileNet
from paddle.fluid.contrib.slim.core import Compressor
from paddle.fluid.contrib.slim.graph import GraphWrapper
class TestFilterPruning(unittest.TestCase):
def test_compression(self):
"""
Model: mobilenet_v1
data: mnist
step1: Training one epoch
step2: pruning flops
step3: fine-tune one epoch
step4: check top1_acc.
"""
if not fluid.core.is_compiled_with_cuda():
return
class_dim = 10
image_shape = [1, 28, 28]
image = fluid.layers.data(
name='image', shape=image_shape, dtype='float32')
image.stop_gradient = False
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
out = MobileNet().net(input=image, class_dim=class_dim)
acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
val_program = fluid.default_main_program().clone(for_test=False)
cost = fluid.layers.cross_entropy(input=out, label=label)
avg_cost = fluid.layers.mean(x=cost)
optimizer = fluid.optimizer.Momentum(
momentum=0.9,
learning_rate=0.01,
regularization=fluid.regularizer.L2Decay(4e-5))
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128)
val_feed_list = [('img', image.name), ('label', label.name)]
val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5',
acc_top5.name)]
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128)
train_feed_list = [('img', image.name), ('label', label.name)]
train_fetch_list = [('loss', avg_cost.name)]
com_pass = Compressor(
place,
fluid.global_scope(),
fluid.default_main_program(),
train_reader=train_reader,
train_feed_list=train_feed_list,
train_fetch_list=train_fetch_list,
eval_program=val_program,
eval_reader=val_reader,
eval_feed_list=val_feed_list,
eval_fetch_list=val_fetch_list,
train_optimizer=optimizer)
com_pass.config('./filter_pruning/compress.yaml')
eval_graph = com_pass.run()
self.assertTrue(
abs((com_pass.context.eval_results['acc_top1'][-1] - 0.969) / 0.969)
< 0.02)
def test_uniform_restore_from_checkpoint(self):
np.random.seed(0)
self.uniform_restore_from_checkpoint(
"./filter_pruning/uniform_restore_0.yaml")
acc_0 = self.uniform_restore_from_checkpoint(
"./filter_pruning/uniform_restore_1.yaml")
np.random.seed(0)
acc_1 = self.uniform_restore_from_checkpoint(
"./filter_pruning/uniform_restore.yaml")
self.assertTrue(abs((acc_0 - acc_1) / acc_1) < 0.001)
def uniform_restore_from_checkpoint(self, config_file):
class_dim = 10
image_shape = [1, 28, 28]
train_program = fluid.Program()
startup_program = fluid.Program()
train_program.random_seed = 10
startup_program.random_seed = 10
with fluid.program_guard(train_program, startup_program):
with fluid.unique_name.guard():
image = fluid.layers.data(
name='image', shape=image_shape, dtype='float32')
image.stop_gradient = False
label = fluid.layers.data(
name='label', shape=[1], dtype='int64')
out = fluid.layers.conv2d(image, 4, 1)
out = fluid.layers.fc(out, size=class_dim)
out = fluid.layers.softmax(out)
acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
cost = fluid.layers.cross_entropy(input=out, label=label)
avg_cost = fluid.layers.mean(x=cost)
val_program = train_program.clone(for_test=False)
optimizer = fluid.optimizer.Momentum(
momentum=0.9,
learning_rate=0.01,
regularization=fluid.regularizer.L2Decay(4e-5))
place = fluid.CPUPlace()
scope = fluid.Scope()
exe = fluid.Executor(place)
exe.run(startup_program, scope=scope)
val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128)
val_feed_list = [('img', image.name), ('label', label.name)]
val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5',
acc_top5.name)]
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128)
train_feed_list = [('img', image.name), ('label', label.name)]
train_fetch_list = [('loss', avg_cost.name)]
com_pass = Compressor(
place,
scope,
train_program,
train_reader=train_reader,
train_feed_list=train_feed_list,
train_fetch_list=train_fetch_list,
eval_program=val_program,
eval_reader=val_reader,
eval_feed_list=val_feed_list,
eval_fetch_list=val_fetch_list,
train_optimizer=optimizer)
com_pass.config(config_file)
eval_graph = com_pass.run()
return com_pass.context.eval_results['acc_top1'][-1]
if __name__ == '__main__':
unittest.main()
# copyright (c) 2019 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
from __future__ import print_function
import unittest
import paddle.fluid as fluid
import six
import numpy as np
from paddle.fluid.contrib.slim.graph import GraphWrapper
from paddle.fluid import core
import os
os.environ['CPU_NUM'] = str(4)
def residual_block(num):
def conv_bn_layer(input,
ch_out,
filter_size,
stride,
padding,
act='relu',
bias_attr=False):
tmp = fluid.layers.conv2d(
input=input,
filter_size=filter_size,
num_filters=ch_out,
stride=stride,
padding=padding,
use_cudnn=False,
act=None,
bias_attr=bias_attr)
return fluid.layers.batch_norm(input=tmp, act=act)
data = fluid.layers.data(name='image', shape=[1, 8, 8], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
data.stop_gradinet = False
hidden = data
for _ in six.moves.xrange(num):
conv = conv_bn_layer(hidden, 16, 3, 1, 1, act=None, bias_attr=True)
short = conv_bn_layer(hidden, 16, 1, 1, 0, act=None)
hidden = fluid.layers.elementwise_add(x=conv, y=short, act='relu')
fc = fluid.layers.fc(input=hidden, size=10)
loss = fluid.layers.cross_entropy(input=fc, label=label)
loss = fluid.layers.mean(loss)
return data, label, loss
class TestGraphWrapper(unittest.TestCase):
def build_program(self):
place = fluid.CPUPlace()
if fluid.core.is_compiled_with_cuda():
place = fluid.CUDAPlace(0)
main = fluid.Program()
startup = fluid.Program()
with fluid.program_guard(main, startup):
image, label, self.loss = residual_block(2)
eval_program = main.clone()
opt = fluid.optimizer.SGD(learning_rate=0.001)
opt.minimize(self.loss)
self.scope = core.Scope()
exe = fluid.Executor(place)
exe.run(startup, scope=self.scope)
self.eval_graph = GraphWrapper(
program=eval_program,
in_nodes={'image': image.name,
'label': label.name},
out_nodes={'loss': self.loss.name})
self.train_graph = GraphWrapper(
program=main,
in_nodes={'image': image.name,
'label': label.name},
out_nodes={'loss': self.loss.name})
def test_all_parameters(self):
self.build_program()
self.assertEquals(len(self.train_graph.all_parameters()), 24)
def test_all_vars(self):
self.build_program()
# self.assertEquals(len(self.train_graph.vars()), 90)
# activation inplace has been disabled in python side
# which may produce more variable in program_desc
# update 90 => 94
# delete three useless RAW variables in Conv2D
# update 94 => 91
self.assertEquals(len(self.train_graph.vars()), 91)
def test_numel_params(self):
self.build_program()
self.assertEquals(self.train_graph.numel_params(), 13258)
def test_compile(self):
self.build_program()
place = fluid.CPUPlace()
if fluid.core.is_compiled_with_cuda():
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
self.train_graph.compile()
exe.run(self.train_graph.compiled_graph,
scope=self.scope,
feed={
'image':
np.random.randint(0, 40, [16, 1, 8, 8]).astype('float32'),
'label': np.random.randint(0, 10, [16, 1]).astype('int64')
})
def test_pre_and_next_ops(self):
self.build_program()
for op in self.train_graph.ops():
for next_op in self.train_graph.next_ops(op):
self.assertTrue(op in self.train_graph.pre_ops(next_op))
def test_get_optimize_graph(self):
self.build_program()
place = fluid.CPUPlace()
if fluid.core.is_compiled_with_cuda():
place = fluid.CUDAPlace(0)
opt = fluid.optimizer.SGD(learning_rate=0.001)
train_graph = self.eval_graph.get_optimize_graph(
opt, place, self.scope, no_grad_var_names=['image'])
self.assertEquals(len(self.train_graph.ops()), len(train_graph.ops()))
exe = fluid.Executor(place)
train_graph.compile()
image = np.random.randint(0, 225, [16, 1, 8, 8]).astype('float32')
label = np.random.randint(0, 10, [16, 1]).astype('int64')
exe.run(train_graph.compiled_graph,
scope=self.scope,
feed={'image': image,
'label': label})
def test_get_optimize_graph_without_loss(self):
self.build_program()
self.eval_graph.out_nodes = {}
place = fluid.CPUPlace()
if fluid.core.is_compiled_with_cuda():
place = fluid.CUDAPlace(0)
opt = fluid.optimizer.SGD(learning_rate=0.001)
train_graph = self.eval_graph.get_optimize_graph(
opt, place, self.scope, no_grad_var_names=['image'])
self.assertEquals(train_graph, None)
def test_flops(self):
self.build_program()
self.assertEquals(self.train_graph.flops(), 354624)
def test_merge(self):
self.build_program()
self.train_graph.merge(self.eval_graph)
self.assertEquals(len(self.train_graph.ops()), 72)
if __name__ == '__main__':
unittest.main()
# copyright (c) 2019 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
"""
Test LightNAS.
"""
import sys
import unittest
import paddle.fluid as fluid
from paddle.fluid.contrib.slim.core import Compressor
sys.path.append("./light_nas")
from light_nas_space import LightNASSpace
class TestLightNAS(unittest.TestCase):
"""
Test LightNAS.
"""
def test_compression(self):
"""
Test LightNAS.
"""
# Update compress.yaml
lines = list()
fid = open('./light_nas/compress.yaml')
for line in fid:
if 'target_latency' in line:
lines.append(' target_latency: 0\n')
else:
lines.append(line)
fid.close()
fid = open('./light_nas/compress.yaml', 'w')
for line in lines:
fid.write(line)
fid.close()
# Begin test
if not fluid.core.is_compiled_with_cuda():
return
space = LightNASSpace()
startup_prog, train_prog, test_prog, train_metrics, test_metrics, train_reader, test_reader = space.create_net(
)
train_cost, train_acc1, train_acc5, global_lr = train_metrics
test_cost, test_acc1, test_acc5 = test_metrics
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(startup_prog)
val_fetch_list = [('acc_top1', test_acc1.name),
('acc_top5', test_acc5.name)]
train_fetch_list = [('loss', train_cost.name)]
com_pass = Compressor(
place,
fluid.global_scope(),
train_prog,
train_reader=train_reader,
train_feed_list=None,
train_fetch_list=train_fetch_list,
eval_program=test_prog,
eval_reader=test_reader,
eval_feed_list=None,
eval_fetch_list=val_fetch_list,
train_optimizer=None,
search_space=space)
com_pass.config('./light_nas/compress.yaml')
eval_graph = com_pass.run()
def test_compression_with_target_latency(self):
"""
Test LightNAS with target_latency.
"""
# Update compress.yaml
lines = list()
fid = open('./light_nas/compress.yaml')
for line in fid:
if 'target_latency' in line:
lines.append(' target_latency: 1\n')
else:
lines.append(line)
fid.close()
fid = open('./light_nas/compress.yaml', 'w')
for line in lines:
fid.write(line)
fid.close()
# Begin test
if not fluid.core.is_compiled_with_cuda():
return
space = LightNASSpace()
startup_prog, train_prog, test_prog, train_metrics, test_metrics, train_reader, test_reader = space.create_net(
)
train_cost, train_acc1, train_acc5, global_lr = train_metrics
test_cost, test_acc1, test_acc5 = test_metrics
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(startup_prog)
val_fetch_list = [('acc_top1', test_acc1.name),
('acc_top5', test_acc5.name)]
train_fetch_list = [('loss', train_cost.name)]
com_pass = Compressor(
place,
fluid.global_scope(),
train_prog,
train_reader=train_reader,
train_feed_list=None,
train_fetch_list=train_fetch_list,
eval_program=test_prog,
eval_reader=test_reader,
eval_feed_list=None,
eval_fetch_list=val_fetch_list,
train_optimizer=None,
search_space=space)
com_pass.config('./light_nas/compress.yaml')
eval_graph = com_pass.run()
if __name__ == '__main__':
unittest.main()
# copyright (c) 2019 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import unittest
import os
import sys
import argparse
import shutil
import logging
import struct
import six
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.framework import IrGraph
from paddle.fluid import core
from paddle.fluid.contrib.slim.core import Compressor
from paddle.fluid.log_helper import get_logger
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--batch_size', type=int, default=1, help='batch size')
parser.add_argument(
'--infer_model',
type=str,
default='',
help='infer_model is used to load an original fp32 ProgramDesc with fp32 weights'
)
parser.add_argument('--infer_data', type=str, default='', help='data file')
parser.add_argument(
'--int8_model_save_path',
type=str,
default='./output',
help='infer_data is used to save an int8 ProgramDesc with fp32 weights')
parser.add_argument(
'--warmup_batch_size',
type=int,
default=100,
help='batch size for quantization warmup')
parser.add_argument(
'--accuracy_diff_threshold',
type=float,
default=0.01,
help='accepted accuracy drop threshold.')
test_args, args = parser.parse_known_args(namespace=unittest)
return test_args, sys.argv[:1] + args
class TestMKLDNNPostTrainingQuantStrategy(unittest.TestCase):
"""
Test API of Post Training quantization strategy for int8 with MKL-DNN.
"""
def _reader_creator(self, data_file='data.bin', cycle=False):
def reader():
with open(data_file, 'rb') as fp:
num = fp.read(8)
num = struct.unpack('q', num)[0]
imgs_offset = 8
img_ch = 3
img_w = 224
img_h = 224
img_pixel_size = 4
img_size = img_ch * img_h * img_w * img_pixel_size
label_size = 8
labels_offset = imgs_offset + num * img_size
step = 0
while step < num:
fp.seek(imgs_offset + img_size * step)
img = fp.read(img_size)
img = struct.unpack_from(
'{}f'.format(img_ch * img_w * img_h), img)
img = np.array(img)
img.shape = (img_ch, img_w, img_h)
fp.seek(labels_offset + label_size * step)
label = fp.read(label_size)
label = struct.unpack('q', label)[0]
yield img, int(label)
step += 1
if cycle and step == num:
step = 0
return reader
def _update_config_file(self, fp32_model_path, output_path):
config_path = './quantization/config_mkldnn_int8.yaml'
new_config_path = './quantization/temp.yaml'
shutil.copy(config_path, new_config_path)
with open(new_config_path, 'r+') as fp:
data = fp.read()
data = data.replace('MODEL_PATH', fp32_model_path)
data = data.replace('OUTPUT_PATH', output_path)
with open(new_config_path, 'w') as fp:
fp.write(data)
return new_config_path
def _transform_depthwise_conv(self, graph):
'''
Transform depthwise_conv2d into conv2d, with MKL-DNN only
'''
ops = graph.all_op_nodes()
for op_node in ops:
name = op_node.name()
if name in ['depthwise_conv2d']:
input_var_node = graph._find_node_by_name(
op_node.inputs, op_node.input("Input")[0])
weight_var_node = graph._find_node_by_name(
op_node.inputs, op_node.input("Filter")[0])
output_var_node = graph._find_node_by_name(
graph.all_var_nodes(), op_node.output("Output")[0])
attrs = {
name: op_node.op().attr(name)
for name in op_node.op().attr_names()
}
conv_op_node = graph.create_op_node(
op_type='conv2d',
attrs=attrs,
inputs={
'Input': input_var_node,
'Filter': weight_var_node
},
outputs={'Output': output_var_node})
graph.link_to(input_var_node, conv_op_node)
graph.link_to(weight_var_node, conv_op_node)
graph.link_to(conv_op_node, output_var_node)
graph.safe_remove_nodes(op_node)
return graph
def _predict(self, test_reader=None, model_path=None):
place = fluid.CPUPlace()
exe = fluid.Executor(place)
inference_scope = fluid.executor.global_scope()
with fluid.scope_guard(inference_scope):
if os.path.exists(os.path.join(model_path, '__model__')):
[inference_program, feed_target_names,
fetch_targets] = fluid.io.load_inference_model(model_path, exe)
else:
[inference_program, feed_target_names,
fetch_targets] = fluid.io.load_inference_model(
model_path, exe, 'model', 'params')
use_mkldnn = fluid.core.globals()["FLAGS_use_mkldnn"]
if (use_mkldnn):
graph = IrGraph(
core.Graph(inference_program.desc), for_test=True)
graph = self._transform_depthwise_conv(graph)
inference_program = graph.to_program()
dshape = [3, 224, 224]
top1 = 0.0
top5 = 0.0
total_samples = 0
for batch_id, data in enumerate(test_reader()):
if six.PY2:
images = map(lambda x: x[0].reshape(dshape), data)
if six.PY3:
images = list(map(lambda x: x[0].reshape(dshape), data))
images = np.array(images).astype('float32')
labels = np.array([x[1] for x in data]).astype("int64")
labels = labels.reshape([-1, 1])
fluid.core.set_num_threads(int(os.environ['CPU_NUM_THREADS']))
out = exe.run(inference_program,
feed={
feed_target_names[0]: images,
feed_target_names[1]: labels
},
fetch_list=fetch_targets)
fluid.core.set_num_threads(1)
top1 += np.sum(out[1]) * len(data)
top5 += np.sum(out[2]) * len(data)
total_samples += len(data)
if (batch_id + 1) % 100 == 0:
_logger.info('{} images have been predicted'.format(
total_samples))
return top1 / total_samples, top5 / total_samples
def _warmup(self, reader=None, config_path=''):
com_pass = Compressor(
place=None,
scope=None,
train_program=None,
train_reader=None,
train_feed_list=[],
train_fetch_list=[],
eval_program=None,
eval_reader=reader,
eval_feed_list=[],
eval_fetch_list=[],
teacher_programs=[],
checkpoint_path='',
train_optimizer=None,
distiller_optimizer=None)
com_pass.config(config_path)
com_pass.run()
def _compare_accuracy(self, fp32_acc1, int8_acc1, threshold):
_logger.info('--- Accuracy summary ---')
_logger.info(
'Accepted top1 accuracy drop threshold: {0}. (condition: (FP32_top1_acc - IN8_top1_acc) <= threshold)'
.format(threshold))
_logger.info('FP32: avg top1 accuracy: {0:.4f}'.format(fp32_acc1))
_logger.info('INT8: avg top1 accuracy: {0:.4f}'.format(int8_acc1))
assert fp32_acc1 > 0.0
assert int8_acc1 > 0.0
assert fp32_acc1 - int8_acc1 <= threshold
def test_compression(self):
if not fluid.core.is_compiled_with_mkldnn():
return
int8_model_path = test_case_args.int8_model_save_path
data_path = test_case_args.infer_data
fp32_model_path = test_case_args.infer_model
batch_size = test_case_args.batch_size
warmup_batch_size = test_case_args.warmup_batch_size
accuracy_diff_threshold = test_case_args.accuracy_diff_threshold
_logger.info(
'FP32 & INT8 prediction run: batch_size {0}, warmup batch size {1}.'
.format(batch_size, warmup_batch_size))
#warmup dataset, only use the first batch data
warmup_reader = paddle.batch(
self._reader_creator(data_path, False),
batch_size=warmup_batch_size)
config_path = self._update_config_file(fp32_model_path, int8_model_path)
self._warmup(warmup_reader, config_path)
_logger.info('--- INT8 prediction start ---')
val_reader = paddle.batch(
self._reader_creator(data_path, False), batch_size=batch_size)
int8_model_result = self._predict(val_reader, int8_model_path)
_logger.info('--- FP32 prediction start ---')
val_reader = paddle.batch(
self._reader_creator(data_path, False), batch_size=batch_size)
fp32_model_result = self._predict(val_reader, fp32_model_path)
self._compare_accuracy(fp32_model_result[0], int8_model_result[0],
accuracy_diff_threshold)
if __name__ == '__main__':
global test_case_args
test_case_args, remaining_args = parse_args()
unittest.main(argv=remaining_args)
# copyright (c) 2019 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import paddle
import unittest
import paddle.fluid as fluid
from mobilenet import MobileNet
from paddle.fluid.contrib.slim.core import Compressor
from paddle.fluid.contrib.slim.graph import GraphWrapper
class TestQuantizationStrategy(unittest.TestCase):
"""
Test API of quantization strategy.
"""
def test_compression(self):
self.quan("./quantization/compress.yaml")
self.quan("./quantization/compress_1.yaml")
def quan(self, config_file):
if not fluid.core.is_compiled_with_cuda():
return
class_dim = 10
image_shape = [1, 28, 28]
train_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(train_program, startup_program):
with fluid.unique_name.guard():
image = fluid.layers.data(
name='image', shape=image_shape, dtype='float32')
image.stop_gradient = False
label = fluid.layers.data(
name='label', shape=[1], dtype='int64')
out = MobileNet(name='quan').net(input=image,
class_dim=class_dim)
print("out: {}".format(out.name))
acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
cost = fluid.layers.cross_entropy(input=out, label=label)
avg_cost = fluid.layers.mean(x=cost)
val_program = train_program.clone(for_test=False)
optimizer = fluid.optimizer.Momentum(
momentum=0.9,
learning_rate=0.01,
regularization=fluid.regularizer.L2Decay(4e-5))
scope = fluid.Scope()
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(startup_program, scope=scope)
val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128)
val_feed_list = [('img', image.name), ('label', label.name)]
val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5',
acc_top5.name)]
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128)
train_feed_list = [('img', image.name), ('label', label.name)]
train_fetch_list = [('loss', avg_cost.name)]
com_pass = Compressor(
place,
scope,
train_program,
train_reader=train_reader,
train_feed_list=train_feed_list,
train_fetch_list=train_fetch_list,
eval_program=val_program,
eval_reader=val_reader,
eval_feed_list=val_feed_list,
eval_fetch_list=val_fetch_list,
train_optimizer=optimizer)
com_pass.config(config_file)
eval_graph = com_pass.run()
if __name__ == '__main__':
unittest.main()
# copyright (c) 2019 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import os
import paddle
import unittest
import paddle.fluid as fluid
from mobilenet import MobileNet
from paddle.fluid.contrib.slim.core import Compressor
from paddle.fluid.contrib.slim.graph import GraphWrapper
class TestReader(unittest.TestCase):
"""
Test API of quantization strategy.
"""
def set_train_reader(self, image, label, place):
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128)
return train_reader
def set_val_reader(self, image, label, place):
val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128)
return val_reader
def set_feed_list(self, image, label):
return [('img', image.name), ('label', label.name)]
def quan(self, config_file):
if not fluid.core.is_compiled_with_cuda():
return
class_dim = 10
image_shape = [1, 28, 28]
train_program = fluid.Program()
startup_program = fluid.Program()
val_program = fluid.Program()
with fluid.program_guard(train_program, startup_program):
with fluid.unique_name.guard():
image = fluid.layers.data(
name='image', shape=image_shape, dtype='float32')
image.stop_gradient = False
label = fluid.layers.data(
name='label', shape=[1], dtype='int64')
out = MobileNet(name='quan').net(input=image,
class_dim=class_dim)
print("out: {}".format(out.name))
acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
cost = fluid.layers.cross_entropy(input=out, label=label)
avg_cost = fluid.layers.mean(x=cost)
optimizer = fluid.optimizer.Momentum(
momentum=0.9,
learning_rate=0.01,
regularization=fluid.regularizer.L2Decay(4e-5))
val_program = train_program.clone(for_test=False)
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(startup_program)
val_reader = self.set_val_reader(image, label, place)
val_feed_list = self.set_feed_list(image, label)
val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5',
acc_top5.name)]
train_reader = self.set_train_reader(image, label, place)
train_feed_list = self.set_feed_list(image, label)
train_fetch_list = [('loss', avg_cost.name)]
com_pass = Compressor(
place,
fluid.global_scope(),
train_program,
train_reader=train_reader,
train_feed_list=train_feed_list,
train_fetch_list=train_fetch_list,
eval_program=val_program,
eval_reader=val_reader,
eval_feed_list=val_feed_list,
eval_fetch_list=val_fetch_list,
train_optimizer=optimizer)
com_pass.config(config_file)
eval_graph = com_pass.run()
class TestReader1(TestReader):
def set_train_reader(self, image, label, place):
loader = fluid.io.DataLoader.from_generator(
feed_list=[image, label], capacity=16, iterable=True)
loader.set_sample_generator(
paddle.dataset.mnist.train(), batch_size=128, places=place)
return loader
def set_val_reader(self, image, label, place):
loader = fluid.io.DataLoader.from_generator(
feed_list=[image, label], capacity=16, iterable=True)
loader.set_sample_generator(
paddle.dataset.mnist.test(), batch_size=128, places=place)
return loader
def test_compression(self):
self.quan("./quantization/compress_2.yaml")
if __name__ == '__main__':
unittest.main()
# copyright (c) 2019 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import paddle
import unittest
import paddle.fluid as fluid
from mobilenet import MobileNet
from paddle.fluid.contrib.slim.core import Compressor
from paddle.fluid.contrib.slim.graph import GraphWrapper
class TestDistillationStrategy(unittest.TestCase):
"""
Test API of distillation strategy.
"""
def test_compression(self):
if not fluid.core.is_compiled_with_cuda():
return
class_dim = 10
image_shape = [1, 28, 28]
image = fluid.layers.data(
name='image', shape=image_shape, dtype='float32')
image.stop_gradient = False
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
out = MobileNet(name="student").net(input=image, class_dim=class_dim)
acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
val_program = fluid.default_main_program().clone(for_test=False)
cost = fluid.layers.cross_entropy(input=out, label=label)
avg_cost = fluid.layers.mean(x=cost)
optimizer = fluid.optimizer.Momentum(
momentum=0.9,
learning_rate=fluid.layers.piecewise_decay(
boundaries=[5, 10], values=[0.01, 0.001, 0.0001]),
regularization=fluid.regularizer.L2Decay(4e-5))
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128)
val_feed_list = [('img', image.name), ('label', label.name)]
val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5',
acc_top5.name)]
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128)
train_feed_list = [('img', image.name), ('label', label.name)]
train_fetch_list = [('loss', avg_cost.name)]
# define teacher program
teacher_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(teacher_program, startup_program):
img = teacher_program.global_block()._clone_variable(
image, force_persistable=False)
predict = MobileNet(name="teacher").net(input=img,
class_dim=class_dim)
exe.run(startup_program)
com_pass = Compressor(
place,
fluid.global_scope(),
fluid.default_main_program(),
train_reader=train_reader,
train_feed_list=train_feed_list,
train_fetch_list=train_fetch_list,
eval_program=val_program,
eval_reader=val_reader,
eval_feed_list=val_feed_list,
eval_fetch_list=val_fetch_list,
teacher_programs=[teacher_program.clone(for_test=True)],
train_optimizer=optimizer,
distiller_optimizer=optimizer)
com_pass.config('./distillation/compress.yaml')
eval_graph = com_pass.run()
if __name__ == '__main__':
unittest.main()
......@@ -166,14 +166,8 @@ packages=['paddle',
'paddle.fluid.contrib.quantize',
'paddle.fluid.contrib.reader',
'paddle.fluid.contrib.slim',
'paddle.fluid.contrib.slim.core',
'paddle.fluid.contrib.slim.graph',
'paddle.fluid.contrib.slim.prune',
'paddle.fluid.contrib.slim.quantization',
'paddle.fluid.contrib.slim.quantization.imperative',
'paddle.fluid.contrib.slim.distillation',
'paddle.fluid.contrib.slim.nas',
'paddle.fluid.contrib.slim.searcher',
'paddle.fluid.contrib.utils',
'paddle.fluid.contrib.extend_optimizer',
'paddle.fluid.contrib.mixed_precision',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册