prune_model.py 6.1 KB
Newer Older
Z
zhouzj 已提交
1 2
import os
import time
C
ceci3 已提交
3
import numpy as np
Z
zhouzj 已提交
4 5
import paddle
import paddle.static as static
C
ceci3 已提交
6 7
from ...prune import Pruner
from ...core import GraphWrapper
8
from ...common.load_model import load_inference_model
Z
zhouzj 已提交
9 10 11
__all__ = ["get_sparse_model", "get_prune_model"]


C
ceci3 已提交
12 13
def get_sparse_model(executor, places, model_file, param_file, ratio,
                     save_path):
Z
zhouzj 已提交
14 15 16 17 18 19 20 21
    """
    Using the unstructured sparse algorithm to compress the network. 
    This interface is only used to evaluate the latency of the compressed network, and does not consider the loss of accuracy.
    Args:
        model_file(str), param_file(str): The inference model to be pruned.
        ratio(float): The ratio to prune the model.
        save_path(str): The save path of pruned model.
    """
22
    assert os.path.exists(model_file), '{} does not exist.'.format(model_file)
Z
zhouzj 已提交
23
    assert os.path.exists(
24 25
        param_file) or param_file is None, '{} does not exist.'.format(
            param_file)
Z
zhouzj 已提交
26 27 28 29 30 31 32 33 34 35 36 37 38
    paddle.enable_static()

    SKIP = ['image', 'feed', 'pool2d_0.tmp_0']

    folder = os.path.dirname(model_file)
    model_name = model_file.split('/')[-1]
    if param_file is None:
        param_name = None
    else:
        param_name = param_file.split('/')[-1]

    main_prog = static.Program()
    startup_prog = static.Program()
C
ceci3 已提交
39
    executor.run(startup_prog)
Z
zhouzj 已提交
40

C
ceci3 已提交
41 42
    inference_program, feed_target_names, fetch_targets = load_inference_model(
        folder, executor, model_filename=model_name, params_filename=param_name)
Z
zhouzj 已提交
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
    thresholds = {}

    graph = GraphWrapper(inference_program)
    for op in graph.ops():
        for inp in op.all_inputs():
            name = inp.name()
            if inp.name() in SKIP: continue
            if 'tmp' in inp.name(): continue
            # 1x1_conv
            cond_conv = len(inp._var.shape) == 4 and inp._var.shape[
                2] == 1 and inp._var.shape[3] == 1
            cond_fc = False

            if cond_fc or cond_conv:
                array = np.array(paddle.static.global_scope().find_var(name)
                                 .get_tensor())
                flatten = np.abs(array.flatten())
                index = min(len(flatten) - 1, int(ratio * len(flatten)))
                ind = np.unravel_index(
                    np.argsort(
                        flatten, axis=None), flatten.shape)
                thresholds[name] = ind[0][:index]

    for op in graph.ops():
        for inp in op.all_inputs():
            name = inp.name()
            if name in SKIP: continue
            if 'tmp' in inp.name(): continue

            cond_conv = (len(inp._var.shape) == 4 and inp._var.shape[2] == 1 and
                         inp._var.shape[3] == 1)
            cond_fc = False

            # only support 1x1_conv now
            if not (cond_conv or cond_fc): continue
            array = np.array(paddle.static.global_scope().find_var(name)
                             .get_tensor())
            if thresholds.get(name) is not None:
                np.put(array, thresholds.get(name), 0)
            assert (abs(1 - np.count_nonzero(array) / array.size - ratio) < 1e-2
                    ), 'The model sparsity is abnormal.'
            paddle.static.global_scope().find_var(name).get_tensor().set(
                array, paddle.CPUPlace())

C
ceci3 已提交
87 88 89
    feed_vars = [
        inference_program.global_block().var(name) for name in feed_target_names
    ]
C
ceci3 已提交
90 91 92
    model_name = '.'.join(model_name.split('.')
                          [:-1]) if model_name is not None else 'model'
    save_path = os.path.join(save_path, model_name)
C
ceci3 已提交
93
    static.save_inference_model(
Z
zhouzj 已提交
94
        save_path,
C
ceci3 已提交
95 96
        feed_vars=feed_vars,
        fetch_vars=fetch_targets,
C
ceci3 已提交
97
        executor=executor,
C
ceci3 已提交
98
        program=inference_program)
Z
zhouzj 已提交
99 100 101
    print("The pruned model is saved in: ", save_path)


C
ceci3 已提交
102
def get_prune_model(executor, places, model_file, param_file, ratio, save_path):
Z
zhouzj 已提交
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
    """
    Using the structured pruning algorithm to compress the network. 
    This interface is only used to evaluate the latency of the compressed network, and does not consider the loss of accuracy.
    Args:
        model_file(str), param_file(str): The inference model to be pruned.
        ratio(float): The ratio to prune the model.
        save_path(str): The save path of pruned model.
    """

    assert os.path.exists(model_file), f'{model_file} does not exist.'
    assert os.path.exists(
        param_file) or param_file is None, f'{param_file} does not exist.'
    paddle.enable_static()

    SKIP = ['image', 'feed', 'pool2d_0.tmp_0']

    folder = os.path.dirname(model_file)
    model_name = model_file.split('/')[-1]
    if param_file is None:
        param_name = None
    else:
        param_name = param_file.split('/')[-1]

    main_prog = static.Program()
    startup_prog = static.Program()
    scope = static.global_scope()
C
ceci3 已提交
129
    executor.run(startup_prog)
Z
zhouzj 已提交
130

C
ceci3 已提交
131 132
    inference_program, feed_target_names, fetch_targets = load_inference_model(
        folder, executor, model_filename=model_name, params_filename=param_name)
Z
zhouzj 已提交
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155

    prune_params = []
    graph = GraphWrapper(inference_program)
    for op in graph.ops():
        for inp in op.all_inputs():
            name = inp.name()
            if inp.name() in SKIP: continue
            if 'tmp' in inp.name(): continue
            cond_conv = len(inp._var.shape) == 4 and 'conv' in name
            # only prune conv
            if cond_conv:
                prune_params.append(name)

    # drop last conv
    prune_params.pop()
    ratios = [ratio] * len(prune_params)

    pruner = Pruner()
    main_program, _, _ = pruner.prune(
        inference_program,
        scope,
        params=prune_params,
        ratios=ratios,
C
ceci3 已提交
156
        place=places,
Z
zhouzj 已提交
157 158 159 160 161
        lazy=False,
        only_graph=False,
        param_backup=None,
        param_shape_backup=None)

C
ceci3 已提交
162 163 164
    feed_vars = [
        main_program.global_block().var(name) for name in feed_target_names
    ]
C
ceci3 已提交
165 166 167
    model_name = '.'.join(model_name.split('.')
                          [:-1]) if model_name is not None else 'model'
    save_path = os.path.join(save_path, model_name)
C
ceci3 已提交
168
    static.save_inference_model(
Z
zhouzj 已提交
169
        save_path,
C
ceci3 已提交
170 171
        feed_vars=feed_vars,
        fetch_vars=fetch_targets,
C
ceci3 已提交
172
        executor=executor,
C
ceci3 已提交
173
        program=main_program)