'''
A collection of graph transforms.

A transformer is a callable that accepts a graph and returns a transformed version.
'''
import os
import numpy as np

from .caffe import get_caffe_resolver, has_pycaffe
from .errors import KaffeError, print_stderr
from .layers import NodeKind


class DataInjector(object):
    '''
    Associates parameters loaded from a .caffemodel file with their corresponding nodes.
    '''

    def __init__(self, def_path, data_path):
        # The .prototxt file defining the graph
        self.def_path = def_path
        # The .caffemodel file containing the learned parameters
        self.data_path = data_path
        # Set to true if the fallback protocol-buffer based backend was used
        self.did_use_pb = False
        # A list containing (layer name, parameters) tuples
        self.params = None
        # Load the parameters
        self.load()

    def load(self):
        if has_pycaffe():
            self.load_using_caffe()
        else:
            self.load_using_pb()

    def load_using_caffe(self):
        caffe = get_caffe_resolver().caffe
        net = caffe.Net(self.def_path, self.data_path, caffe.TEST)
        data = lambda blob: blob.data
        self.params = [(k, map(data, v)) for k, v in net.params.items()]

    def load_using_pb(self):
        data = get_caffe_resolver().NetParameter()
        data.MergeFromString(open(self.data_path, 'rb').read())
        pair = lambda layer: (layer.name, self.normalize_pb_data(layer))
        layers = data.layers or data.layer
        self.params = [pair(layer) for layer in layers if layer.blobs]
        self.did_use_pb = True

    def normalize_pb_data(self, layer):
        transformed = []
        for blob in layer.blobs:
            if len(blob.shape.dim):
                dims = blob.shape.dim
                c_o, c_i, h, w = map(int, [1] * (4 - len(dims)) + list(dims))
            else:
                c_o = blob.num
                c_i = blob.channels
                h = blob.height
                w = blob.width
            data = np.array(blob.data, dtype=np.float32).reshape(c_o, c_i, h, w)
            transformed.append(data)
        return transformed

    def adjust_parameters(self, node, data):
        if not self.did_use_pb:
            return data
        # When using the protobuf-backend, each parameter initially has four dimensions.
        # In certain cases (like FC layers), we want to eliminate the singleton dimensions.
        # This implementation takes care of the common cases. However, it does leave the
        # potential for future issues.
        # The Caffe-backend does not suffer from this problem.
        data = list(data)
        squeeze_indices = [1]  # Squeeze biases.
        if node.kind == NodeKind.InnerProduct:
            squeeze_indices.append(0)  # Squeeze FC.
        for idx in squeeze_indices:
            data[idx] = np.squeeze(data[idx])
        return data

    def __call__(self, graph):
        for layer_name, data in self.params:
            if layer_name in graph:
                node = graph.get_node(layer_name)
                node.data = self.adjust_parameters(node, data)
            else:
                print_stderr('Ignoring parameters for non-existent layer: %s' %
                             layer_name)
        return graph


class DataReshaper(object):
    def __init__(self, mapping, replace=True):
        # A dictionary mapping NodeKind to the transposed order.
        self.mapping = mapping
        # The node kinds eligible for reshaping
        self.reshaped_node_types = self.mapping.keys()
        # If true, the reshaped data will replace the old one.
        # Otherwise, it's set to the reshaped_data attribute.
        self.replace = replace

    def has_spatial_parent(self, node):
        try:
            parent = node.get_only_parent()
            s = parent.output_shape
            return s.height > 1 or s.width > 1
        except KaffeError:
            return False

    def map(self, node_kind):
        try:
            return self.mapping[node_kind]
        except KeyError:
            raise
            #raise KaffeError('Ordering not found for node kind: {}'.format(node_kind))

    def __call__(self, graph):
        for node in graph.nodes:
            if node.data is None:
                continue
            if node.kind not in self.reshaped_node_types:
                # Check for 2+ dimensional data
                if any(len(tensor.shape) > 1 for tensor in node.data):
                    print_stderr('Warning: parmaters not reshaped for node: {}'.
                                 format(node))
                continue
            transpose_order = self.map(node.kind)
            weights = node.data[0]
            if (node.kind == NodeKind.InnerProduct
                ) and self.has_spatial_parent(node):
                # The FC layer connected to the spatial layer needs to be
                # re-wired to match the new spatial ordering.
                in_shape = node.get_only_parent().output_shape
                fc_shape = weights.shape
                output_channels = fc_shape[0]
                if os.environ.get('CONVERTER', 'paddle') == 'paddle':
                    #for paddle
                    weights = weights.reshape((output_channels, -1))
                    weights = weights.transpose(transpose_order)
                    node.reshaped_data = weights
                else:
                    #for TF
                    weights = weights.reshape((output_channels, in_shape.channels, \
                            in_shape.height, in_shape.width))
                    weights = weights.transpose(self.map(NodeKind.Convolution))
                    node.reshaped_data = weights.reshape(
                        fc_shape[transpose_order[0]],
                        fc_shape[transpose_order[1]])
            else:
                node.reshaped_data = weights.transpose(transpose_order)

        if self.replace:
            for node in graph.nodes:
                if hasattr(node, 'reshaped_data'):
                    # Set the weights
                    node.data[0] = node.reshaped_data
                    del node.reshaped_data
        return graph


class SubNodeFuser(object):
    '''
    An abstract helper for merging a single-child with its single-parent.
    '''

    def __call__(self, graph):
        nodes = graph.nodes
        fused_nodes = []
        for node in nodes:
            if len(node.parents) != 1:
                # We're only fusing nodes with single parents
                continue
            parent = node.get_only_parent()
            if len(parent.children) != 1:
                # We can only fuse a node if its parent's
                # value isn't used by any other node.
                continue
            if not self.is_eligible_pair(parent, node):
                continue
            # Rewrite the fused node's children to its parent.
            for child in node.children:
                child.parents.remove(node)
                parent.add_child(child)
            # Disconnect the fused node from the graph.
            parent.children.remove(node)
            fused_nodes.append(node)
            # Let the sub-class merge the fused node in any arbitrary way.
            self.merge(parent, node)
        transformed_nodes = [node for node in nodes if node not in fused_nodes]
        return graph.replaced(transformed_nodes)

    def is_eligible_pair(self, parent, child):
        '''Returns true if this parent/child pair is eligible for fusion.'''
        raise NotImplementedError('Must be implemented by subclass.')

    def merge(self, parent, child):
        '''Merge the child node into the parent.'''
        raise NotImplementedError('Must be implemented by subclass')


class ReLUFuser(SubNodeFuser):
    '''
    Fuses rectified linear units with their parent nodes.
    '''

    def __init__(self, allowed_parent_types=None):
        # Fuse ReLUs when the parent node is one of the given types.
        # If None, all node types are eligible.
        self.allowed_parent_types = allowed_parent_types

    def is_eligible_pair(self, parent, child):
        return ((self.allowed_parent_types is None or
                 parent.kind in self.allowed_parent_types) and
                child.kind == NodeKind.ReLU)

    def merge(self, parent, _):
        parent.metadata['relu'] = True


class BatchNormScaleBiasFuser(SubNodeFuser):
    '''
    The original batch normalization paper includes two learned
    parameters: a scaling factor \gamma and a bias \beta.
    Caffe's implementation does not include these two. However, it is commonly
    replicated by adding a scaling+bias layer immidiately after the batch norm.

    This fuser merges the scaling+bias layer with the batch norm.
    '''

    def is_eligible_pair(self, parent, child):
        return (parent.kind == NodeKind.BatchNorm and
                child.kind == NodeKind.Scale and child.parameters.axis == 1 and
                child.parameters.bias_term == True)

    def merge(self, parent, child):
        parent.scale_bias_node = child


class BatchNormPreprocessor(object):
    '''
    Prescale batch normalization parameters.
    Concatenate gamma (scale) and beta (bias) terms if set.
    '''

    def __call__(self, graph):
        for node in graph.nodes:
            if node.kind != NodeKind.BatchNorm:
                continue
            assert node.data is not None
            assert len(node.data) == 3
            mean, variance, scale = node.data
            # Prescale the stats
            scaling_factor = 1.0 / scale if scale != 0 else 0
            mean *= scaling_factor
            variance *= scaling_factor
            # Replace with the updated values
            node.data = [mean, variance]
            if hasattr(node, 'scale_bias_node'):
                # Include the scale and bias terms
                gamma, beta = node.scale_bias_node.data
                node.data += [gamma, beta]
        return graph


class NodeRenamer(object):
    '''
    Renames nodes in the graph using a given unary function that
    accepts a node and returns its new name.
    '''

    def __init__(self, renamer):
        self.renamer = renamer

    def __call__(self, graph):
        for node in graph.nodes:
            node.name = self.renamer(node)
        return graph


class ParameterNamer(object):
    '''
    Convert layer data arrays to a dictionary mapping parameter names to their values.
    '''

    def __call__(self, graph):
        for node in graph.nodes:
            if node.data is None:
                continue
            if node.kind in (NodeKind.Convolution, NodeKind.InnerProduct):
                names = ('weights', )
                if node.parameters.bias_term:
                    names += ('biases', )
            elif node.kind == NodeKind.BatchNorm:
                names = ('mean', 'variance')
                if len(node.data) == 4:
                    names += ('scale', 'offset')
            else:
                print_stderr('WARNING: Unhandled parameters: {}'.format(
                    node.kind))
                continue
            assert len(names) == len(node.data)
            node.data = dict(zip(names, node.data))
        return graph