prune_model.py 5.9 KB
Newer Older
Z
zhouzj 已提交
1 2
import os
import time
C
ceci3 已提交
3
import numpy as np
Z
zhouzj 已提交
4 5
import paddle
import paddle.static as static
C
ceci3 已提交
6 7
from ...prune import Pruner
from ...core import GraphWrapper
C
ceci3 已提交
8
from .load_model import load_inference_model
Z
zhouzj 已提交
9 10 11
__all__ = ["get_sparse_model", "get_prune_model"]


C
ceci3 已提交
12 13
def get_sparse_model(executor, places, model_file, param_file, ratio,
                     save_path):
Z
zhouzj 已提交
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
    """
    Using the unstructured sparse algorithm to compress the network. 
    This interface is only used to evaluate the latency of the compressed network, and does not consider the loss of accuracy.
    Args:
        model_file(str), param_file(str): The inference model to be pruned.
        ratio(float): The ratio to prune the model.
        save_path(str): The save path of pruned model.
    """
    assert os.path.exists(model_file), f'{model_file} does not exist.'
    assert os.path.exists(
        param_file) or param_file is None, f'{param_file} does not exist.'
    paddle.enable_static()

    SKIP = ['image', 'feed', 'pool2d_0.tmp_0']

    folder = os.path.dirname(model_file)
    model_name = model_file.split('/')[-1]
    if param_file is None:
        param_name = None
    else:
        param_name = param_file.split('/')[-1]

    main_prog = static.Program()
    startup_prog = static.Program()
C
ceci3 已提交
38
    executor.run(startup_prog)
Z
zhouzj 已提交
39

C
ceci3 已提交
40 41
    inference_program, feed_target_names, fetch_targets = load_inference_model(
        folder, executor, model_filename=model_name, params_filename=param_name)
Z
zhouzj 已提交
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
    thresholds = {}

    graph = GraphWrapper(inference_program)
    for op in graph.ops():
        for inp in op.all_inputs():
            name = inp.name()
            if inp.name() in SKIP: continue
            if 'tmp' in inp.name(): continue
            # 1x1_conv
            cond_conv = len(inp._var.shape) == 4 and inp._var.shape[
                2] == 1 and inp._var.shape[3] == 1
            cond_fc = False

            if cond_fc or cond_conv:
                array = np.array(paddle.static.global_scope().find_var(name)
                                 .get_tensor())
                flatten = np.abs(array.flatten())
                index = min(len(flatten) - 1, int(ratio * len(flatten)))
                ind = np.unravel_index(
                    np.argsort(
                        flatten, axis=None), flatten.shape)
                thresholds[name] = ind[0][:index]

    for op in graph.ops():
        for inp in op.all_inputs():
            name = inp.name()
            if name in SKIP: continue
            if 'tmp' in inp.name(): continue

            cond_conv = (len(inp._var.shape) == 4 and inp._var.shape[2] == 1 and
                         inp._var.shape[3] == 1)
            cond_fc = False

            # only support 1x1_conv now
            if not (cond_conv or cond_fc): continue
            array = np.array(paddle.static.global_scope().find_var(name)
                             .get_tensor())
            if thresholds.get(name) is not None:
                np.put(array, thresholds.get(name), 0)
            assert (abs(1 - np.count_nonzero(array) / array.size - ratio) < 1e-2
                    ), 'The model sparsity is abnormal.'
            paddle.static.global_scope().find_var(name).get_tensor().set(
                array, paddle.CPUPlace())

C
ceci3 已提交
86 87 88 89
    feed_vars = [
        inference_program.global_block().var(name) for name in feed_target_names
    ]
    static.save_inference_model(
Z
zhouzj 已提交
90
        save_path,
C
ceci3 已提交
91 92
        feed_vars=feed_vars,
        fetch_vars=fetch_targets,
C
ceci3 已提交
93
        executor=executor,
C
ceci3 已提交
94
        program=inference_program,
Z
zhouzj 已提交
95 96 97 98 99
        model_filename=model_name,
        params_filename=param_name)
    print("The pruned model is saved in: ", save_path)


C
ceci3 已提交
100
def get_prune_model(executor, places, model_file, param_file, ratio, save_path):
Z
zhouzj 已提交
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
    """
    Using the structured pruning algorithm to compress the network. 
    This interface is only used to evaluate the latency of the compressed network, and does not consider the loss of accuracy.
    Args:
        model_file(str), param_file(str): The inference model to be pruned.
        ratio(float): The ratio to prune the model.
        save_path(str): The save path of pruned model.
    """

    assert os.path.exists(model_file), f'{model_file} does not exist.'
    assert os.path.exists(
        param_file) or param_file is None, f'{param_file} does not exist.'
    paddle.enable_static()

    SKIP = ['image', 'feed', 'pool2d_0.tmp_0']

    folder = os.path.dirname(model_file)
    model_name = model_file.split('/')[-1]
    if param_file is None:
        param_name = None
    else:
        param_name = param_file.split('/')[-1]

    main_prog = static.Program()
    startup_prog = static.Program()
    scope = static.global_scope()
C
ceci3 已提交
127
    executor.run(startup_prog)
Z
zhouzj 已提交
128

C
ceci3 已提交
129 130
    inference_program, feed_target_names, fetch_targets = load_inference_model(
        folder, executor, model_filename=model_name, params_filename=param_name)
Z
zhouzj 已提交
131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153

    prune_params = []
    graph = GraphWrapper(inference_program)
    for op in graph.ops():
        for inp in op.all_inputs():
            name = inp.name()
            if inp.name() in SKIP: continue
            if 'tmp' in inp.name(): continue
            cond_conv = len(inp._var.shape) == 4 and 'conv' in name
            # only prune conv
            if cond_conv:
                prune_params.append(name)

    # drop last conv
    prune_params.pop()
    ratios = [ratio] * len(prune_params)

    pruner = Pruner()
    main_program, _, _ = pruner.prune(
        inference_program,
        scope,
        params=prune_params,
        ratios=ratios,
C
ceci3 已提交
154
        place=places,
Z
zhouzj 已提交
155 156 157 158 159
        lazy=False,
        only_graph=False,
        param_backup=None,
        param_shape_backup=None)

C
ceci3 已提交
160 161 162 163
    feed_vars = [
        main_program.global_block().var(name) for name in feed_target_names
    ]
    static.save_inference_model(
Z
zhouzj 已提交
164
        save_path,
C
ceci3 已提交
165 166
        feed_vars=feed_vars,
        fetch_vars=fetch_targets,
C
ceci3 已提交
167
        executor=executor,
C
ceci3 已提交
168
        program=main_program,
Z
zhouzj 已提交
169 170
        model_filename=model_name,
        params_filename=param_name)