import numpy as np import torch from torch import nn from pytorch_widedeep.wdtypes import * # noqa: F403 allowed_activations = ["relu", "leaky_relu", "gelu"] def _get_activation_fn(activation): if activation == "relu": return nn.ReLU(inplace=True) if activation == "leaky_relu": return nn.LeakyReLU(inplace=True) elif activation == "gelu": return nn.GELU() def dense_layer( inp: int, out: int, activation: str, p: float, bn: bool, linear_first: bool, ): # This is bascially the LinBnDrop class at the fastai library act_fn = _get_activation_fn(activation) layers = [nn.BatchNorm1d(out if linear_first else inp)] if bn else [] if p != 0: layers.append(nn.Dropout(p)) # type: ignore[arg-type] lin = [nn.Linear(inp, out, bias=not bn), act_fn] layers = lin + layers if linear_first else layers + lin return nn.Sequential(*layers) class MLP(nn.Module): def __init__( self, d_hidden: List[int], activation: str, dropout: Optional[Union[float, List[float]]], batchnorm: bool, batchnorm_last: bool, linear_first: bool, ): super(MLP, self).__init__() if not dropout: dropout = [0.0] * len(d_hidden) elif isinstance(dropout, float): dropout = [dropout] * len(d_hidden) self.mlp = nn.Sequential() for i in range(1, len(d_hidden)): self.mlp.add_module( "dense_layer_{}".format(i - 1), dense_layer( d_hidden[i - 1], d_hidden[i], activation, dropout[i - 1], batchnorm and (i != len(d_hidden) - 1 or batchnorm_last), linear_first, ), ) def forward(self, X: Tensor) -> Tensor: return self.mlp(X) class TabMlp(nn.Module): def __init__( self, column_idx: Dict[str, int], mlp_hidden_dims: List[int] = [200, 100], mlp_activation: str = "relu", mlp_dropout: Union[float, List[float]] = 0.1, mlp_batchnorm: bool = False, mlp_batchnorm_last: bool = False, mlp_linear_first: bool = False, embed_input: Optional[List[Tuple[str, int, int]]] = None, embed_dropout: float = 0.1, continuous_cols: Optional[List[str]] = None, batchnorm_cont: bool = False, ): r"""Defines a ``TabMlp`` model that can be used as the ``deeptabular`` component of a Wide & Deep model. This class combines embedding representations of the categorical features with numerical (aka continuous) features. These are then passed through a series of dense layers (i.e. a MLP). Parameters ---------- column_idx: Dict Dict containing the index of the columns that will be passed through the TabMlp model. Required to slice the tensors. e.g. {'education': 0, 'relationship': 1, 'workclass': 2, ...} mlp_hidden_dims: List, default = [200, 100] List with the number of neurons per dense layer in the mlp. mlp_activation: str, default = "relu" Activation function for the dense layers of the MLP. Currently only "relu", "leaky_relu" and "gelu" are supported mlp_dropout: float or List, default = 0.1 float or List of floats with the dropout between the dense layers. e.g: [0.5,0.5] mlp_batchnorm: bool, default = False Boolean indicating whether or not batch normalization will be applied to the dense layers mlp_batchnorm_last: bool, default = False Boolean indicating whether or not batch normalization will be applied to the last of the dense layers mlp_linear_first: bool, default = False Boolean indicating the order of the operations in the dense layer. If ``True: [LIN -> ACT -> BN -> DP]``. If ``False: [BN -> DP -> LIN -> ACT]`` embed_input: List, Optional, default = None List of Tuples with the column name, number of unique values and embedding dimension. e.g. [(education, 11, 32), ...] embed_dropout: float, default = 0.1 embeddings dropout continuous_cols: List, Optional, default = None List with the name of the numeric (aka continuous) columns batchnorm_cont: bool, default = False Boolean indicating whether or not to apply batch normalization to the continuous input Attributes ---------- mlp: ``nn.Sequential`` mlp model that will receive the concatenation of the embeddings and the continuous columns embed_layers: ``nn.ModuleDict`` ``ModuleDict`` with the embedding output_dim: int The output dimension of the model. This is a required attribute neccesary to build the WideDeep class Example -------- >>> import torch >>> from pytorch_widedeep.models import TabMlp >>> X_tab = torch.cat((torch.empty(5, 4).random_(4), torch.rand(5, 1)), axis=1) >>> colnames = ['a', 'b', 'c', 'd', 'e'] >>> embed_input = [(u,i,j) for u,i,j in zip(colnames[:4], [4]*4, [8]*4)] >>> column_idx = {k:v for v,k in enumerate(colnames)} >>> model = TabMlp(mlp_hidden_dims=[8,4], column_idx=column_idx, embed_input=embed_input, ... continuous_cols = ['e']) >>> out = model(X_tab) """ super(TabMlp, self).__init__() self.column_idx = column_idx self.mlp_hidden_dims = mlp_hidden_dims self.mlp_activation = mlp_activation self.mlp_dropout = mlp_dropout self.mlp_batchnorm = mlp_batchnorm self.mlp_linear_first = mlp_linear_first self.embed_input = embed_input self.embed_dropout = embed_dropout self.continuous_cols = continuous_cols self.batchnorm_cont = batchnorm_cont if self.mlp_activation not in allowed_activations: raise ValueError( "Currently, only the following activation functions are supported " "for for the MLP's dense layers: {}. Got {} instead".format( ", ".join(allowed_activations), self.mlp_activation ) ) # Embeddings: val + 1 because 0 is reserved for padding/unseen cateogories. if self.embed_input is not None: self.embed_layers = nn.ModuleDict( { "emb_layer_" + col: nn.Embedding(val + 1, dim, padding_idx=0) for col, val, dim in self.embed_input } ) self.embedding_dropout = nn.Dropout(embed_dropout) emb_inp_dim = np.sum([embed[2] for embed in self.embed_input]) else: emb_inp_dim = 0 # type: ignore[assignment] # Continuous if self.continuous_cols is not None: cont_inp_dim = len(self.continuous_cols) if self.batchnorm_cont: self.norm = nn.BatchNorm1d(cont_inp_dim) else: cont_inp_dim = 0 # MLP input_dim = emb_inp_dim + cont_inp_dim mlp_hidden_dims = [input_dim] + mlp_hidden_dims # type: ignore[assignment, operator] self.tab_mlp = MLP( mlp_hidden_dims, mlp_activation, mlp_dropout, mlp_batchnorm, mlp_batchnorm_last, mlp_linear_first, ) # the output_dim attribute will be used as input_dim when "merging" the models self.output_dim = mlp_hidden_dims[-1] def forward(self, X: Tensor) -> Tensor: # type: ignore r"""Forward pass that concatenates the continuous features with the embeddings. The result is then passed through a series of dense layers """ if self.embed_input is not None: embed = [ self.embed_layers["emb_layer_" + col](X[:, self.column_idx[col]].long()) for col, _, _ in self.embed_input ] x = torch.cat(embed, 1) x = self.embedding_dropout(x) if self.continuous_cols is not None: cont_idx = [self.column_idx[col] for col in self.continuous_cols] x_cont = X[:, cont_idx].float() if self.batchnorm_cont: x_cont = self.norm(x_cont) x = torch.cat([x, x_cont], 1) if self.embed_input is not None else x_cont return self.tab_mlp(x)