analyze_helper.py 5.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
# Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import matplotlib
matplotlib.use('Agg')
import logging
import numpy as np
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt
import os

import paddle
import paddle.fluid as fluid

from ..common import get_logger
_logger = get_logger(__name__, level=logging.INFO)


def pdf(program,
        var_names,
        executor=None,
        batch_generator=None,
        data_loader=None,
        feed_vars=None,
        fetch_list=None,
        scope=None,
        pdf_save_dir='tmp_pdf'):
    """
    Draw hist for distributtion of variables in that name is in var_names
    
    Args:
        program(fluid.Program): program to analyze.
        var_names(list): name of variables to analyze. When there is activation name in var_names, 
            you should set executor, one of batch_generator and data_loader, feed_list.
        executor(fluid.Executor, optional): The executor to run program. Default is None.
        batch_generator(Python Generator, optional): The batch generator provides calibrate data for DataLoader,
            and it returns a batch every time. For data_loader and batch_generator, 
            only one can be set. Default is None.
        data_loader(fluid.io.DataLoader, optional): The data_loader provides calibrate data to run program. 
            Default is None.
        feed_vars(list): feed variables for program. When you use batch_generator to provide data, 
            you should set feed_vars. Default is None.
        fetch_list(list): fetch list for program. Default is None.
        scope(fluid.Scope, optional): The scope to run program, use it to load variables. 
            If scope is None, will use fluid.global_scope().
        pdf_save_dir(str): dirname to save pdf. Default is 'tmp_pdf'
    
    Returns:
        dict: numpy array of variables that name in var_names
    """
    scope = fluid.global_scope() if scope is None else scope
    assert isinstance(var_names, list), 'var_names is a list of variable name'
    real_names = []
    weight_only = True
    for var in program.list_vars():
        if var.name in var_names:
            if var.persistable == False:
                weight_only = False
                var.persistable = True
            real_names.append(var.name)

    if weight_only == False:
        if batch_generator is not None:
            assert feed_vars is not None, "When using batch_generator, feed_vars must be set"
            dataloader = fluid.io.DataLoader.from_generator(
                feed_list=feed_vars, capacity=512, iterable=True)
            dataloader.set_batch_generator(batch_generator, executor.place)
        elif data_loader is not None:
            dataloader = data_loader
        else:
            _logger.info(
                "When both batch_generator and data_loader is None, var_names can only include weight names"
            )
            return

        assert executor is not None, "when var_names include activations'name, executor must be set"
        assert fetch_list is not None, "when var_names include activations'name,, executor must be set"

        for data in dataloader:
            executor.run(program=program,
                         feed=data,
                         fetch_list=fetch_list,
                         return_numpy=False)
            break

    res_np = {}
    for name in real_names:
        var = fluid.global_scope().find_var(name)
        if var is not None:
            res_np[name] = np.array(var.get_tensor())
        else:
            _logger.info(
                "can't find var {}. Maybe you should set one of batch_generator and data_loader".
                format(name))
    numbers = len(real_names)
    if pdf_save_dir is not None:
        if not os.path.exists(pdf_save_dir):
            os.mkdir(pdf_save_dir)
        pdf_path = os.path.join(pdf_save_dir, 'result.pdf')
        with PdfPages(pdf_path) as pdf:
            idx = 1
            for name in res_np.keys():
                if idx % 10 == 0:
                    _logger.info("plt {}/{}".format(idx, numbers))
                arr = res_np[name]
                arr = arr.flatten()
                weights = np.ones_like(arr) / len(arr)
                plt.hist(arr, bins=1000, weights=weights)
                plt.xlabel(name)
                plt.ylabel("frequency")
                plt.title("Hist of variable {}".format(name))
                plt.show()
                pdf.savefig()
                plt.close()
                idx += 1
    return res_np