From 86b5e96cf97b0992bdca32f0c99e083b5a99dbb7 Mon Sep 17 00:00:00 2001
From: Liufang Sang <slf12thuss@163.com>
Date: Tue, 9 Jun 2020 11:44:44 +0800
Subject: [PATCH] add api to draw hist of act or weight (#246)

---
 paddleslim/common/__init__.py       |   3 +-
 paddleslim/common/analyze_helper.py | 127 ++++++++++++++++++++++++++++
 2 files changed, 129 insertions(+), 1 deletion(-)
 create mode 100644 paddleslim/common/analyze_helper.py

diff --git a/paddleslim/common/__init__.py b/paddleslim/common/__init__.py
index 4d1eb1e6..894d5d5a 100644
--- a/paddleslim/common/__init__.py
+++ b/paddleslim/common/__init__.py
@@ -21,9 +21,10 @@ from .cached_reader import cached_reader
 from .server import Server
 from .client import Client
 from .meter import AvgrageMeter
+from .analyze_helper import pdf
 
 __all__ = [
     'EvolutionaryController', 'SAController', 'get_logger', 'ControllerServer',
     'ControllerClient', 'lock', 'unlock', 'cached_reader', 'AvgrageMeter',
-    'Server', 'Client', 'RLBaseController'
+    'Server', 'Client', 'RLBaseController', 'pdf'
 ]
diff --git a/paddleslim/common/analyze_helper.py b/paddleslim/common/analyze_helper.py
new file mode 100644
index 00000000..d5883bb5
--- /dev/null
+++ b/paddleslim/common/analyze_helper.py
@@ -0,0 +1,127 @@
+# Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import matplotlib
+matplotlib.use('Agg')
+import logging
+import numpy as np
+from matplotlib.backends.backend_pdf import PdfPages
+import matplotlib.pyplot as plt
+import os
+
+import paddle
+import paddle.fluid as fluid
+
+from ..common import get_logger
+_logger = get_logger(__name__, level=logging.INFO)
+
+
+def pdf(program,
+        var_names,
+        executor=None,
+        batch_generator=None,
+        data_loader=None,
+        feed_vars=None,
+        fetch_list=None,
+        scope=None,
+        pdf_save_dir='tmp_pdf'):
+    """
+    Draw hist for distributtion of variables in that name is in var_names
+    
+    Args:
+        program(fluid.Program): program to analyze.
+        var_names(list): name of variables to analyze. When there is activation name in var_names, 
+            you should set executor, one of batch_generator and data_loader, feed_list.
+        executor(fluid.Executor, optional): The executor to run program. Default is None.
+        batch_generator(Python Generator, optional): The batch generator provides calibrate data for DataLoader,
+            and it returns a batch every time. For data_loader and batch_generator, 
+            only one can be set. Default is None.
+        data_loader(fluid.io.DataLoader, optional): The data_loader provides calibrate data to run program. 
+            Default is None.
+        feed_vars(list): feed variables for program. When you use batch_generator to provide data, 
+            you should set feed_vars. Default is None.
+        fetch_list(list): fetch list for program. Default is None.
+        scope(fluid.Scope, optional): The scope to run program, use it to load variables. 
+            If scope is None, will use fluid.global_scope().
+        pdf_save_dir(str): dirname to save pdf. Default is 'tmp_pdf'
+    
+    Returns:
+        dict: numpy array of variables that name in var_names
+    """
+    scope = fluid.global_scope() if scope is None else scope
+    assert isinstance(var_names, list), 'var_names is a list of variable name'
+    real_names = []
+    weight_only = True
+    for var in program.list_vars():
+        if var.name in var_names:
+            if var.persistable == False:
+                weight_only = False
+                var.persistable = True
+            real_names.append(var.name)
+
+    if weight_only == False:
+        if batch_generator is not None:
+            assert feed_vars is not None, "When using batch_generator, feed_vars must be set"
+            dataloader = fluid.io.DataLoader.from_generator(
+                feed_list=feed_vars, capacity=512, iterable=True)
+            dataloader.set_batch_generator(batch_generator, executor.place)
+        elif data_loader is not None:
+            dataloader = data_loader
+        else:
+            _logger.info(
+                "When both batch_generator and data_loader is None, var_names can only include weight names"
+            )
+            return
+
+        assert executor is not None, "when var_names include activations'name, executor must be set"
+        assert fetch_list is not None, "when var_names include activations'name,, executor must be set"
+
+        for data in dataloader:
+            executor.run(program=program,
+                         feed=data,
+                         fetch_list=fetch_list,
+                         return_numpy=False)
+            break
+
+    res_np = {}
+    for name in real_names:
+        var = fluid.global_scope().find_var(name)
+        if var is not None:
+            res_np[name] = np.array(var.get_tensor())
+        else:
+            _logger.info(
+                "can't find var {}. Maybe you should set one of batch_generator and data_loader".
+                format(name))
+    numbers = len(real_names)
+    if pdf_save_dir is not None:
+        if not os.path.exists(pdf_save_dir):
+            os.mkdir(pdf_save_dir)
+        pdf_path = os.path.join(pdf_save_dir, 'result.pdf')
+        with PdfPages(pdf_path) as pdf:
+            idx = 1
+            for name in res_np.keys():
+                if idx % 10 == 0:
+                    _logger.info("plt {}/{}".format(idx, numbers))
+                arr = res_np[name]
+                arr = arr.flatten()
+                weights = np.ones_like(arr) / len(arr)
+                plt.hist(arr, bins=1000, weights=weights)
+                plt.xlabel(name)
+                plt.ylabel("frequency")
+                plt.title("Hist of variable {}".format(name))
+                plt.show()
+                pdf.savefig()
+                plt.close()
+                idx += 1
+    return res_np
-- 
GitLab