import numpy as np
import torch
from torch import nn

from pytorch_widedeep.wdtypes import *  # noqa: F403

allowed_activations = ["relu", "leaky_relu", "gelu"]


def _get_activation_fn(activation):
    if activation == "relu":
        return nn.ReLU(inplace=True)
    if activation == "leaky_relu":
        return nn.LeakyReLU(inplace=True)
    elif activation == "gelu":
        return nn.GELU()


def dense_layer(
    inp: int,
    out: int,
    activation: str,
    p: float,
    bn: bool,
    linear_first: bool,
):
    # This is bascially the LinBnDrop class at the fastai library
    act_fn = _get_activation_fn(activation)
    layers = [nn.BatchNorm1d(out if linear_first else inp)] if bn else []
    if p != 0:
        layers.append(nn.Dropout(p))  # type: ignore[arg-type]
    lin = [nn.Linear(inp, out, bias=not bn), act_fn]
    layers = lin + layers if linear_first else layers + lin
    return nn.Sequential(*layers)


class MLP(nn.Module):
    def __init__(
        self,
        d_hidden: List[int],
        activation: str,
        dropout: Optional[Union[float, List[float]]],
        batchnorm: bool,
        batchnorm_last: bool,
        linear_first: bool,
    ):
        super(MLP, self).__init__()

        if not dropout:
            dropout = [0.0] * len(d_hidden)
        elif isinstance(dropout, float):
            dropout = [dropout] * len(d_hidden)

        self.mlp = nn.Sequential()
        for i in range(1, len(d_hidden)):
            self.mlp.add_module(
                "dense_layer_{}".format(i - 1),
                dense_layer(
                    d_hidden[i - 1],
                    d_hidden[i],
                    activation,
                    dropout[i - 1],
                    batchnorm and (i != len(d_hidden) - 1 or batchnorm_last),
                    linear_first,
                ),
            )

    def forward(self, X: Tensor) -> Tensor:
        return self.mlp(X)


class TabMlp(nn.Module):
    def __init__(
        self,
        column_idx: Dict[str, int],
        mlp_hidden_dims: List[int] = [200, 100],
        mlp_activation: str = "relu",
        mlp_dropout: Union[float, List[float]] = 0.1,
        mlp_batchnorm: bool = False,
        mlp_batchnorm_last: bool = False,
        mlp_linear_first: bool = False,
        embed_input: Optional[List[Tuple[str, int, int]]] = None,
        embed_dropout: float = 0.1,
        continuous_cols: Optional[List[str]] = None,
        batchnorm_cont: bool = False,
    ):
        r"""Defines a ``TabMlp`` model that can be used as the ``deeptabular``
        component of a Wide & Deep model.

        This class combines embedding representations of the categorical features
        with numerical (aka continuous) features. These are then passed through a
        series of dense layers (i.e. a MLP).

        Parameters
        ----------
        column_idx: Dict
            Dict containing the index of the columns that will be passed through
            the TabMlp model. Required to slice the tensors. e.g. {'education':
            0, 'relationship': 1, 'workclass': 2, ...}
        mlp_hidden_dims: List, default = [200, 100]
            List with the number of neurons per dense layer in the mlp.
        mlp_activation: str, default = "relu"
            Activation function for the dense layers of the MLP. Currently
            only "relu", "leaky_relu" and "gelu" are supported
        mlp_dropout: float or List, default = 0.1
            float or List of floats with the dropout between the dense layers.
            e.g: [0.5,0.5]
        mlp_batchnorm: bool, default = False
            Boolean indicating whether or not batch normalization will be applied
            to the dense layers
        mlp_batchnorm_last: bool, default = False
            Boolean indicating whether or not batch normalization will be applied
            to the last of the dense layers
        mlp_linear_first: bool, default = False
            Boolean indicating the order of the operations in the dense
            layer. If ``True: [LIN -> ACT -> BN -> DP]``. If ``False: [BN -> DP ->
            LIN -> ACT]``
        embed_input: List, Optional, default = None
            List of Tuples with the column name, number of unique values and
            embedding dimension. e.g. [(education, 11, 32), ...]
        embed_dropout: float, default = 0.1
            embeddings dropout
        continuous_cols: List, Optional, default = None
            List with the name of the numeric (aka continuous) columns
        batchnorm_cont: bool, default = False
            Boolean indicating whether or not to apply batch normalization to the
            continuous input

        Attributes
        ----------
        mlp: ``nn.Sequential``
            mlp model that will receive the concatenation of the embeddings and
            the continuous columns
        embed_layers: ``nn.ModuleDict``
            ``ModuleDict`` with the embedding
        output_dim: int
            The output dimension of the model. This is a required attribute
            neccesary to build the WideDeep class

        Example
        --------
        >>> import torch
        >>> from pytorch_widedeep.models import TabMlp
        >>> X_tab = torch.cat((torch.empty(5, 4).random_(4), torch.rand(5, 1)), axis=1)
        >>> colnames = ['a', 'b', 'c', 'd', 'e']
        >>> embed_input = [(u,i,j) for u,i,j in zip(colnames[:4], [4]*4, [8]*4)]
        >>> column_idx = {k:v for v,k in enumerate(colnames)}
        >>> model = TabMlp(mlp_hidden_dims=[8,4], column_idx=column_idx, embed_input=embed_input,
        ... continuous_cols = ['e'])
        >>> out = model(X_tab)
        """
        super(TabMlp, self).__init__()

        self.column_idx = column_idx
        self.mlp_hidden_dims = mlp_hidden_dims
        self.mlp_activation = mlp_activation
        self.mlp_dropout = mlp_dropout
        self.mlp_batchnorm = mlp_batchnorm
        self.mlp_linear_first = mlp_linear_first
        self.embed_input = embed_input
        self.embed_dropout = embed_dropout
        self.continuous_cols = continuous_cols
        self.batchnorm_cont = batchnorm_cont

        if self.mlp_activation not in allowed_activations:
            raise ValueError(
                "Currently, only the following activation functions are supported "
                "for for the MLP's dense layers: {}. Got {} instead".format(
                    ", ".join(allowed_activations), self.mlp_activation
                )
            )

        # Embeddings: val + 1 because 0 is reserved for padding/unseen cateogories.
        if self.embed_input is not None:
            self.embed_layers = nn.ModuleDict(
                {
                    "emb_layer_" + col: nn.Embedding(val + 1, dim, padding_idx=0)
                    for col, val, dim in self.embed_input
                }
            )
            self.embedding_dropout = nn.Dropout(embed_dropout)
            emb_inp_dim = np.sum([embed[2] for embed in self.embed_input])
        else:
            emb_inp_dim = 0  # type: ignore[assignment]

        # Continuous
        if self.continuous_cols is not None:
            cont_inp_dim = len(self.continuous_cols)
            if self.batchnorm_cont:
                self.norm = nn.BatchNorm1d(cont_inp_dim)
        else:
            cont_inp_dim = 0

        # MLP
        input_dim = emb_inp_dim + cont_inp_dim
        mlp_hidden_dims = [input_dim] + mlp_hidden_dims  # type: ignore[assignment, operator]
        self.tab_mlp = MLP(
            mlp_hidden_dims,
            mlp_activation,
            mlp_dropout,
            mlp_batchnorm,
            mlp_batchnorm_last,
            mlp_linear_first,
        )

        # the output_dim attribute will be used as input_dim when "merging" the models
        self.output_dim = mlp_hidden_dims[-1]

    def forward(self, X: Tensor) -> Tensor:  # type: ignore
        r"""Forward pass that concatenates the continuous features with the
        embeddings. The result is then passed through a series of dense layers
        """
        if self.embed_input is not None:
            embed = [
                self.embed_layers["emb_layer_" + col](X[:, self.column_idx[col]].long())
                for col, _, _ in self.embed_input
            ]
            x = torch.cat(embed, 1)
            x = self.embedding_dropout(x)
        if self.continuous_cols is not None:
            cont_idx = [self.column_idx[col] for col in self.continuous_cols]
            x_cont = X[:, cont_idx].float()
            if self.batchnorm_cont:
                x_cont = self.norm(x_cont)
            x = torch.cat([x, x_cont], 1) if self.embed_input is not None else x_cont
        return self.tab_mlp(x)