# Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import types
import paddle
import paddle.fluid as fluid
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

import logging
from ..common import get_logger
_logger = get_logger(__name__, level=logging.INFO)


def get_distribution(program,
                     var_names,
                     executor,
                     reader=None,
                     feed_vars=None,
                     scope=None):
    """
    Get the variables distribution in the var_names list

    Args:
        program(fluid.Program): program to analyze.
        var_names(list): name of variables to analyze. When there is activation name in var_names,
            you should set executor.
        executor(fluid.Executor, optional): The executor to run program. Default is None.
        reader(Python Generator, fluid.io.DataLoader, optional): If you only want to get the distribution of weight parameters,
            you do not need to provide a reader. Otherwise, a reader must be provided. The reader provides calibrate data,
            and it returns a batch every time. It must be either a python generator or a iterable fluid dataloader.
            When you use a python generator, please ensure that its behavior is consistent with `batch_generator`。
            You can get more detail about batch_generator at https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/io_cn/DataLoader_cn.html#id1
        feed_vars(list): feed variables for program. When you use python generator reader to provide data,
            you should set feed_vars. Default is None.
        scope(fluid.Scope, optional): The scope to run program, use it to load variables.
            If scope is None, will use fluid.global_scope().

    Returns:
        dict: numpy array of variables distribution that name in var_names
    """
    scope = fluid.global_scope() if scope is None else scope
    assert isinstance(var_names, list), 'var_names is a list of variable name'
    var_changed = []
    real_names = []
    weight_only = True
    for var in program.list_vars():
        if var.name in var_names:
            if var.persistable == False:
                weight_only = False
                var.persistable = True
                var_changed.append(var)
            real_names.append(var.name)

    def update_var_dist(var_dist):
        for name in real_names:
            var = scope.find_var(name)
            if var is not None:
                var_array = np.array(var.get_tensor())
                var_dist[name] = var_array
            else:
                _logger.info("can't find var {} in scope.".format(name))
        return var_dist

    var_dist = {}
    if weight_only:
        var_dist = update_var_dist(var_dist)
    else:
        assert isinstance(reader, types.GeneratorType) or isinstance(
            reader, fluid.reader.DataLoaderBase
        ), "when var_names include activations'name, reader must be either a python generator or a fluid dataloader."
        assert executor is not None, "when var_names include activations'name, executor must be set"

        if isinstance(reader, types.GeneratorType):
            assert feed_vars is not None, "When using batch_generator, feed_vars must be set"
            dataloader = fluid.io.DataLoader.from_generator(
                feed_list=feed_vars, capacity=128, iterable=True)
            dataloader.set_batch_generator(reader, executor.place)
        elif isinstance(reader, fluid.reader.DataLoaderBase):
            dataloader = reader
        else:
            _logger.info(
                "When both batch_generator and data_loader is None, var_names can only include weight names"
            )
            return

        for data in dataloader:
            executor.run(program=program, feed=data)
            var_dist = update_var_dist(var_dist)
            break

    for var in var_changed:
        var.persistable = False

    return var_dist


def pdf(var_dist, pdf_save_dir='var_dist_pdf'):
    """
    Draw hist for distributtion of variables in that in var_dist.

    Args:
        var_dist(dict): numpy array of variables distribution.
        pdf_save_dir(str): dirname to save pdf. Default is 'var_dist_pdf'
    """
    numbers = len(var_dist)
    if pdf_save_dir is not None:
        if not os.path.exists(pdf_save_dir):
            os.mkdir(pdf_save_dir)
        pdf_path = os.path.join(pdf_save_dir, 'result.pdf')
        with PdfPages(pdf_path) as pdf:
            for i, name in enumerate(var_dist.keys()):
                if i % 10 == 0:
                    _logger.info("plt {}/{}".format(i, numbers))
                arr = var_dist[name]
                arr = arr.flatten()
                weights = np.ones_like(arr) / len(arr)
                plt.hist(arr, bins=1000, weights=weights)
                plt.xlabel(name)
                plt.ylabel("frequency")
                plt.title("Hist of variable {}".format(name))
                plt.show()
                pdf.savefig()
                plt.close()
    _logger.info("variables histogram have been saved as {}".format(pdf_path))