diff --git a/demo/prune/train.py b/demo/prune/train.py index f7d7f5cd854848e097c625b37d9c73f79d2aa662..a8d923b3b9cdaee7c3e938cc6ac3729cd7e6f250 100644 --- a/demo/prune/train.py +++ b/demo/prune/train.py @@ -198,6 +198,10 @@ def compress(args): ratios=[0.33] * len(params), place=place) + for param in pruned_program[0].global_block().all_parameters(): + if "weights" in param.name: + print param.name, param.shape + return _logger.info("fops after pruning: {}".format(flops(pruned_program))) for i in range(args.num_epochs): diff --git a/demo/sensitive/run.sh b/demo/sensitive/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..96e4cb7cbfadd59f45746128df977abaa1966012 --- /dev/null +++ b/demo/sensitive/run.sh @@ -0,0 +1,2 @@ +export CUDA_VISIBLE_DEVICES=2 +python train.py diff --git a/demo/sensitive/train.py b/demo/sensitive/train.py new file mode 100644 index 0000000000000000000000000000000000000000..f043961668dbe1eb4ef8ead91b3f4f460cf80c8a --- /dev/null +++ b/demo/sensitive/train.py @@ -0,0 +1,137 @@ +import os +import sys +import logging +import paddle +import argparse +import functools +import math +import time +import numpy as np +import paddle.fluid as fluid +from paddleslim.prune import merge_sensitive, get_ratios_by_loss +from paddleslim.prune import sensitivity +from paddleslim.common import get_logger +sys.path.append(sys.path[0] + "/../") +import models +from utility import add_arguments, print_arguments + +_logger = get_logger(__name__, level=logging.INFO) + +parser = argparse.ArgumentParser(description=__doc__) +add_arg = functools.partial(add_arguments, argparser=parser) +# yapf: disable +add_arg('batch_size', int, 64 * 4, "Minibatch size.") +add_arg('use_gpu', bool, True, "Whether to use GPU or not.") +add_arg('model', str, "MobileNet", "The target model.") +add_arg('pretrained_model', str, "../pretrained_model/MobileNetV1_pretained", "Whether to use pretrained model.") +add_arg('data', str, "mnist", "Which data to use. 'mnist' or 'imagenet'") +add_arg('log_period', int, 10, "Log period in batches.") +# yapf: enable + +model_list = [m for m in dir(models) if "__" not in m] + + +def compress(args): + test_reader = None + if args.data == "mnist": + import paddle.dataset.mnist as reader + val_reader = reader.test() + class_dim = 10 + image_shape = "1,28,28" + elif args.data == "imagenet": + import imagenet_reader as reader + val_reader = reader.val() + class_dim = 1000 + image_shape = "3,224,224" + else: + raise ValueError("{} is not supported.".format(args.data)) + image_shape = [int(m) for m in image_shape.split(",")] + assert args.model in model_list, "{} is not in lists: {}".format( + args.model, model_list) + image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + # model definition + model = models.__dict__[args.model]() + out = model.net(input=image, class_dim=class_dim) + acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) + acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) + val_program = fluid.default_main_program().clone(for_test=True) + place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + if args.pretrained_model: + + def if_exist(var): + return os.path.exists( + os.path.join(args.pretrained_model, var.name)) + + fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) + + val_reader = paddle.batch(val_reader, batch_size=args.batch_size) + + val_feeder = feeder = fluid.DataFeeder( + [image, label], place, program=val_program) + + def test(program): + batch_id = 0 + acc_top1_ns = [] + acc_top5_ns = [] + for data in val_reader(): + start_time = time.time() + acc_top1_n, acc_top5_n = exe.run( + program, + feed=val_feeder.feed(data), + fetch_list=[acc_top1.name, acc_top5.name]) + end_time = time.time() + if batch_id % args.log_period == 0: + _logger.info( + "Eval batch[{}] - acc_top1: {}; acc_top5: {}; time: {}". + format(batch_id, + np.mean(acc_top1_n), + np.mean(acc_top5_n), end_time - start_time)) + acc_top1_ns.append(np.mean(acc_top1_n)) + acc_top5_ns.append(np.mean(acc_top5_n)) + batch_id += 1 + + _logger.info("Final eva - acc_top1: {}; acc_top5: {}".format( + np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) + return np.mean(np.array(acc_top1_ns)) + + params = [] + for param in fluid.default_main_program().global_block().all_parameters(): + if "_sep_weights" in param.name: + params.append(param.name) + + sensitivity( + val_program, + place, + params, + test, + sensitivities_file="sensitivities_0.data", + pruned_ratios=[0.1, 0.2, 0.3, 0.4]) + + sensitivity( + val_program, + place, + params, + test, + sensitivities_file="sensitivities_1.data", + pruned_ratios=[0.5, 0.6, 0.7]) + + sens = merge_sensitive( + ["./sensitivities_0.data", "./sensitivities_1.data"]) + + ratios = get_ratios_by_loss(sens, 0.01) + + print ratios + + +def main(): + args = parser.parse_args() + print_arguments(args) + compress(args) + + +if __name__ == '__main__': + main() diff --git a/doc/prune_api.md b/doc/prune_api.md index f75520a39f9c83652c8fce44c60943533d14bcbf..bd63cc0d2d879fd1ed08b5c557ff255864241fcd 100644 --- a/doc/prune_api.md +++ b/doc/prune_api.md @@ -175,17 +175,17 @@ for block in program.blocks: ``` {"weight_0": - {"loss": [0.22, 0.33], - "pruned_percent": [0.1, 0.2] + {0.1: 0.22, + 0.2: 0.33 }, "weight_1": - {"loss": [0.21, 0.4], - "pruned_percent": [0.1, 0.2] + {0.1: 0.21, + 0.2: 0.4 } } ``` -其中,`weight_0`是卷积层参数的名称,`weight_0`对应的`loss[i]`为将`weight_0`裁掉`pruned_percent[i]`后的精度损失。 +其中,`weight_0`是卷积层参数的名称,sensitivities['weight_0']的`value`为剪裁比例,`value`为精度损失的比例。 **示例:** @@ -285,3 +285,50 @@ sensitivities = sensitivity(main_program, print(sensitivities) ``` + +## merge_sensitive + +>merge_sensitive(sensitivities) + +合并多个敏感度信息。 + +参数: + +- **sensitivities(list | list):** 待合并的敏感度信息,可以是字典的列表,或者是存放敏感度信息的文件的路径列表。 + +返回: + +- **sensitivities(dict):** 合并后的敏感度信息。其格式为: + +``` +{"weight_0": + {0.1: 0.22, + 0.2: 0.33 + }, + "weight_1": + {0.1: 0.21, + 0.2: 0.4 + } +} +``` + +其中,`weight_0`是卷积层参数的名称,sensitivities['weight_0']的`value`为剪裁比例,`value`为精度损失的比例。 + +示例: + + +## load_sensitivities + +>load_sensitivities(sensitivities_file) + +从文件中加载敏感度信息。 + +参数: + +- **sensitivities_file(str):** 存放敏感度信息的本地文件. + +返回: + +- **sensitivities(dict)**敏感度信息。 + +示例: diff --git a/paddleslim/prune/__init__.py b/paddleslim/prune/__init__.py index 54e0df1331104faefa8cdaa9f61ccc1cd88d6779..d8c439be403ff93a24406c6caf4d2524fd17023a 100644 --- a/paddleslim/prune/__init__.py +++ b/paddleslim/prune/__init__.py @@ -11,14 +11,24 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .pruner import Pruner -from .auto_pruner import AutoPruner -from .controller_server import ControllerServer -from .controller_client import ControllerClient -from .sensitive_pruner import SensitivePruner -from .sensitive import sensitivity, flops_sensitivity +from .pruner import * +import pruner +from .auto_pruner import * +import auto_pruner +from .controller_server import * +import controller_server +from .controller_client import * +import controller_client +from .sensitive_pruner import * +import sensitive_pruner +from .sensitive import * +import sensitive -__all__ = [ - 'Pruner', 'AutoPruner', 'ControllerServer', 'ControllerClient', - 'SensitivePruner', 'sensitivity', 'flops_sensitivity' -] +__all__ = [] + +__all__ += pruner.__all__ +__all__ += auto_pruner.__all__ +__all__ += controller_server.__all__ +__all__ += controller_client.__all__ +__all__ += sensitive_pruner.__all__ +__all__ += sensitive.__all__ diff --git a/paddleslim/prune/sensitive.py b/paddleslim/prune/sensitive.py index 3341a4a2e847bfb62b00a5caf4807df3b960ea68..5b9d229d9c011f0ff495def1d0a0b6519cf39351 100644 --- a/paddleslim/prune/sensitive.py +++ b/paddleslim/prune/sensitive.py @@ -25,7 +25,10 @@ from ..prune import Pruner _logger = get_logger(__name__, level=logging.INFO) -__all__ = ["sensitivity", "flops_sensitivity"] +__all__ = [ + "sensitivity", "flops_sensitivity", "load_sensitivities", + "merge_sensitive", "get_ratios_by_loss" +] def sensitivity(program, @@ -36,23 +39,18 @@ def sensitivity(program, pruned_ratios=None): scope = fluid.global_scope() graph = GraphWrapper(program) - sensitivities = _load_sensitivities(sensitivities_file) + sensitivities = load_sensitivities(sensitivities_file) if pruned_ratios is None: pruned_ratios = np.arange(0.1, 1, step=0.1) for name in param_names: if name not in sensitivities: - size = graph.var(name).shape()[0] - sensitivities[name] = { - 'pruned_percent': [], - 'loss': [], - 'size': size - } + sensitivities[name] = {} baseline = None for name in sensitivities: for ratio in pruned_ratios: - if ratio in sensitivities[name]['pruned_percent']: + if ratio in sensitivities[name]: _logger.debug('{}, {} has computed.'.format(name, ratio)) continue if baseline is None: @@ -75,8 +73,7 @@ def sensitivity(program, _logger.info("pruned param: {}; {}; loss={}".format(name, ratio, loss)) - sensitivities[name]['pruned_percent'].append(ratio) - sensitivities[name]['loss'].append(loss) + sensitivities[name][ratio] = loss _save_sensitivities(sensitivities, sensitivities_file) @@ -98,16 +95,11 @@ def flops_sensitivity(program, scope = fluid.global_scope() graph = GraphWrapper(program) - sensitivities = _load_sensitivities(sensitivities_file) + sensitivities = load_sensitivities(sensitivities_file) for name in param_names: if name not in sensitivities: - size = graph.var(name).shape()[0] - sensitivities[name] = { - 'pruned_percent': [], - 'loss': [], - 'size': size - } + sensitivities[name] = {} base_flops = flops(program) target_pruned_flops = base_flops * pruned_flops_rate @@ -124,15 +116,16 @@ def flops_sensitivity(program, lazy=False, only_graph=True) param_flops = (base_flops - flops(pruned_program)) * 2 - channel_size = sensitivities[name]["size"] + channel_size = graph.var(name).shape()[0] pruned_ratio = target_pruned_flops / float(param_flops) + pruned_ratio = round(pruned_ratio, 3) pruned_size = round(pruned_ratio * channel_size) pruned_ratio = 1 if pruned_size >= channel_size else pruned_ratio - if len(sensitivities[name]["pruned_percent"]) > 0: - _logger.debug('{} exist; pruned ratio: {}; excepted ratio: {}'. - format(name, sensitivities[name]["pruned_percent"][ - 0], pruned_ratio)) + if len(sensitivities[name].keys()) > 0: + _logger.debug( + '{} exist; pruned ratio: {}; excepted ratio: {}'.format( + name, sensitivities[name].keys(), pruned_ratio)) continue if baseline is None: baseline = eval_func(graph.program) @@ -155,8 +148,7 @@ def flops_sensitivity(program, loss = (baseline - pruned_metric) / baseline _logger.info("pruned param: {}; {}; loss={}".format(name, pruned_ratio, loss)) - sensitivities[name]['pruned_percent'].append(pruned_ratio) - sensitivities[name]['loss'].append(loss) + sensitivities[name][pruned_ratio] = loss _save_sensitivities(sensitivities, sensitivities_file) # restore pruned parameters @@ -166,7 +158,30 @@ def flops_sensitivity(program, return sensitivities -def _load_sensitivities(sensitivities_file): +def merge_sensitive(sensitivities): + """ + Merge sensitivities. + Args: + sensitivities(list | list): The sensitivities to be merged. It cann be a list of sensitivities files or dict. + + Returns: + sensitivities(dict): A dict with sensitivities. + """ + assert len(sensitivities) > 0 + if not isinstance(sensitivities[0], dict): + sensitivities = [pickle.load(open(sen, 'r')) for sen in sensitivities] + + new_sensitivities = {} + for sen in sensitivities: + for param, losses in sen.items(): + if param not in new_sensitivities: + new_sensitivities[param] = {} + for percent, loss in losses.items(): + new_sensitivities[param][percent] = loss + return new_sensitivities + + +def load_sensitivities(sensitivities_file): """ Load sensitivities from file. """ @@ -177,17 +192,51 @@ def _load_sensitivities(sensitivities_file): sensitivities = pickle.load(f) else: sensitivities = pickle.load(f, encoding='bytes') - - for param in sensitivities: - sensitivities[param]['pruned_percent'] = [ - round(p, 2) for p in sensitivities[param]['pruned_percent'] - ] return sensitivities def _save_sensitivities(sensitivities, sensitivities_file): """ - Save sensitivities into file. - """ + Save sensitivities into file. + """ with open(sensitivities_file, 'wb') as f: pickle.dump(sensitivities, f) + + +def get_ratios_by_loss(sensitivities, loss): + """ + Get the max ratio of each parameter. The loss of accuracy must be less than given `loss` + when the single parameter was pruned by the max ratio. + + Args: + + sensitivities(dict): The sensitivities used to generate a group of pruning ratios. The key of dict + is name of parameters to be pruned. The value of dict is a list of tuple with + format `(pruned_ratio, accuracy_loss)`. + loss(float): The threshold of accuracy loss. + + Returns: + + ratios(dict): A group of ratios. The key of dict is name of parameters while the value is the ratio to be pruned. + """ + ratios = {} + for param, losses in sensitivities.items(): + losses = losses.items() + losses.sort() + for i in range(len(losses))[::-1]: + if losses[i][1] <= loss: + if i == (len(losses) - 1): + ratios[param] = losses[i][0] + else: + r0, l0 = losses[i] + r1, l1 = losses[i + 1] + d0 = loss - l0 + d1 = l1 - loss + + ratio = r0 + (loss - l0) * (r1 - r0) / (l1 - l0) + ratios[param] = ratio + if ratio > 1: + print losses, ratio, (r1 - r0) / (l1 - l0), i + + break + return ratios diff --git a/paddleslim/prune/sensitive_pruner.py b/paddleslim/prune/sensitive_pruner.py index 197f54c8c5fbd009c57ae009bc876949572be574..37d5965af06f2b4fc56e5ef03429ff22fb0cbd6b 100644 --- a/paddleslim/prune/sensitive_pruner.py +++ b/paddleslim/prune/sensitive_pruner.py @@ -20,7 +20,7 @@ import numpy as np import paddle.fluid as fluid from ..common import get_logger from .sensitive import sensitivity -from .sensitive import flops_sensitivity +from .sensitive import flops_sensitivity, get_ratios_by_loss from ..analysis import flops from .pruner import Pruner @@ -153,8 +153,8 @@ class SensitivePruner(object): sensitivities_file=sensitivities_file, step_size=0.1) print sensitivities - _, ratios = self._get_ratios_by_sensitive(sensitivities, pruned_flops, - eval_program) + _, ratios = self.get_ratios_by_sensitive(sensitivities, pruned_flops, + eval_program) pruned_program = self._pruner.prune( train_program, @@ -185,72 +185,49 @@ class SensitivePruner(object): topk_percents = [percents[param] for param in topk_parms] return topk_parms, topk_percents - def _get_ratios_by_sensitive(self, sensitivities, pruned_flops, - eval_program): + def get_ratios_by_sensitive(self, sensitivities, pruned_flops, + eval_program): """ Search a group of ratios for pruning target flops. - """ - def func(params, x): - a, b, c, d = params - return a * x * x * x + b * x * x + c * x + d + Args: - def error(params, x, y): - return func(params, x) - y + sensitivities(dict): The sensitivities used to generate a group of pruning ratios. The key of dict + is name of parameters to be pruned. The value of dict is a list of tuple with + format `(pruned_ratio, accuracy_loss)`. + pruned_flops(float): The percent of FLOPS to be pruned. + eval_program(Program): The program whose FLOPS is considered. - def slove_coefficient(x, y): - init_coefficient = [10, 10, 10, 10] - coefficient, loss = leastsq(error, init_coefficient, args=(x, y)) - return coefficient + Return: + + ratios(dict): A group of ratios. The key of dict is name of parameters while the value is the ratio to be pruned. + """ min_loss = 0. max_loss = 0. - - # step 1: fit curve by sensitivities - coefficients = {} - for param in sensitivities: - losses = np.array([0] * 5 + sensitivities[param]['loss']) - precents = np.array([0] * 5 + sensitivities[param][ - 'pruned_percent']) - coefficients[param] = slove_coefficient(precents, losses) - loss = np.max(losses) - max_loss = np.max([max_loss, loss]) - # step 2: Find a group of ratios by binary searching. base_flops = flops(eval_program) - ratios = [] + ratios = None max_times = 20 while min_loss < max_loss and max_times > 0: loss = (max_loss + min_loss) / 2 _logger.info( '-----------Try pruned ratios while acc loss={}-----------'. format(loss)) - ratios = [] - # step 2.1: Get ratios according to current loss - for param in sensitivities: - coefficient = copy.deepcopy(coefficients[param]) - coefficient[-1] = coefficient[-1] - loss - roots = np.roots(coefficient) - for root in roots: - min_root = 1 - if np.isreal(root) and root > 0 and root < 1: - selected_root = min(root.real, min_root) - ratios.append(selected_root) + ratios = self.get_ratios_by_loss(sensitivities, loss) _logger.info('Pruned ratios={}'.format( - [round(ratio, 3) for ratio in ratios])) - # step 2.2: Pruning by current ratios - param_shape_backup = {} + [round(ratio, 3) for ratio in ratios.values()])) pruned_program = self._pruner.prune( eval_program, None, # scope - sensitivities.keys(), - ratios, + ratios.keys(), + ratios.values(), None, # place only_graph=True) pruned_ratio = 1 - (float(flops(pruned_program)) / base_flops) _logger.info('Pruned flops: {:.4f}'.format(pruned_ratio)) - # step 2.3: Check whether current ratios is enough + # Check whether current ratios is enough if abs(pruned_ratio - pruned_flops) < 0.015: break if pruned_ratio > pruned_flops: @@ -258,4 +235,4 @@ class SensitivePruner(object): else: min_loss = loss max_times -= 1 - return sensitivities.keys(), ratios + return ratios