提交 bf10a695 编写于 作者: J jrzaurin

Added a few types, refined the docs and added tests for tabnet and the...

Added a few types, refined the docs and added tests for tabnet and the corresponding functionalities
上级 dd2b2141
......@@ -86,7 +86,7 @@ class GLU_Block(nn.Module):
output_dim: int,
n_glu: int = 2,
first: bool = False,
shared_layers: List = None,
shared_layers: nn.ModuleList = None,
ghost_bn: bool = True,
virtual_batch_size: int = 128,
momentum: float = 0.02,
......@@ -142,9 +142,9 @@ class FeatTransformer(nn.Module):
self,
input_dim: int,
output_dim: int,
shared_layers: List,
shared_layers: nn.ModuleList,
n_glu_step_dependent: int,
ghost_bn: bool = True,
ghost_bn=True,
virtual_batch_size=128,
momentum=0.02,
):
......@@ -176,10 +176,10 @@ class FeatTransformer(nn.Module):
class AttentiveTransformer(nn.Module):
def __init__(
self,
input_dim,
output_dim,
mask_type="sparsemax",
ghost_bn: bool = True,
input_dim: int,
output_dim: int,
mask_type: str = "sparsemax",
ghost_bn=True,
virtual_batch_size=128,
momentum=0.02,
):
......@@ -200,7 +200,7 @@ class AttentiveTransformer(nn.Module):
self.mask = sparsemax.Entmax15(dim=-1)
else:
raise NotImplementedError(
"Please choose either sparsemax" + "or entmax as masktype"
"Please choose either 'sparsemax' or 'entmax' as masktype"
)
def forward(self, priors, processed_feat):
......@@ -212,25 +212,25 @@ class AttentiveTransformer(nn.Module):
class TabNetEncoder(nn.Module):
def __init__(
self,
input_dim,
step_dim=8,
attn_dim=8,
n_steps=3,
n_glu_step_dependent=2,
n_glu_shared=2,
ghost_bn=True,
virtual_batch_size=128,
momentum=0.02,
gamma=1.3,
epsilon=1e-15,
mask_type="sparsemax",
input_dim: int,
n_steps: int = 3,
step_dim: int = 8,
attn_dim: int = 8,
n_glu_step_dependent: int = 2,
n_glu_shared: int = 2,
ghost_bn: bool = True,
virtual_batch_size: int = 128,
momentum: float = 0.02,
gamma: float = 1.3,
epsilon: float = 1e-15,
mask_type: str = "sparsemax",
):
super(TabNetEncoder, self).__init__()
self.input_dim = input_dim
self.n_steps = n_steps
self.step_dim = step_dim
self.attn_dim = attn_dim
self.n_steps = n_steps
self.gamma = gamma
self.epsilon = epsilon
......@@ -242,7 +242,7 @@ class TabNetEncoder(nn.Module):
"momentum": momentum,
}
shared_layers = torch.nn.ModuleList()
shared_layers = nn.ModuleList()
for i in range(n_glu_shared):
if i == 0:
shared_layers.append(
......@@ -377,7 +377,7 @@ class EmbeddingsAndContinuous(nn.Module):
else:
cont_out_dim = 0
self.output_dim = emb_out_dim + cont_out_dim
self.output_dim: int = emb_out_dim + cont_out_dim # type: ignore[assignment]
def forward(self, X):
embed = [
......@@ -403,18 +403,95 @@ class TabNet(nn.Module):
embed_dropout: float = 0.0,
continuous_cols: Optional[List[str]] = None,
batchnorm_cont: bool = False,
step_dim=8,
attn_dim=8,
n_steps=3,
n_glu_step_dependent=2,
n_glu_shared=2,
ghost_bn=True,
virtual_batch_size=128,
momentum=0.02,
gamma=1.3,
epsilon=1e-15,
mask_type="sparsemax",
n_steps: int = 3,
step_dim: int = 8,
attn_dim: int = 8,
n_glu_step_dependent: int = 2,
n_glu_shared: int = 2,
ghost_bn: bool = True,
virtual_batch_size: int = 128,
momentum: float = 0.02,
gamma: float = 1.3,
epsilon: float = 1e-15,
mask_type: str = "sparsemax",
):
r"""TabNet model (https://arxiv.org/abs/1908.07442) model that can be used
as the deeptabular component of a Wide & Deep model.
The implementation in this library is fully based on that here:
https://github.com/dreamquark-ai/tabnet, simply adapted so that it can
work within the ``WideDeep`` frame. Therefore, **all credit to the
dreamquark-ai team**
Parameters
----------
column_idx: Dict
Dictionary where the keys are the columns and the values their
corresponding index
embed_input: List
List of Tuples with the column name, number of unique values and
embedding dimension. e.g. [(education, 11, 32), ...]
embed_dropout: float, default = 0.
embeddings dropout
continuous_cols: List, Optional, default = None
List with the name of the numeric (aka continuous) columns
batchnorm_cont: bool, default = False
Boolean indicating whether or not to apply batch normalization to the
continuous input
n_steps: int, default = 3
number of decision steps
step_dim: int, default = 8
Step's output dimension. This is the output dimension that
``WideDeep`` will collect and connect to the output neuron(s). For
a better understanding of the function of this and the upcoming
parameters, please see the `paper
<https://arxiv.org/abs/1908.07442>`_.
attn_dim: int, default = 8
Attention dimension
n_glu_step_dependent: int, default = 2
number of GLU Blocks [FC -> BN -> GLU] that are step dependent
n_glu_shared: int, default = 2
number of GLU Blocks [FC -> BN -> GLU] that will be shared
across decision steps
ghost_bn: bool, default=True
Boolean indicating if `Ghost Batch Normalization
<https://arxiv.org/abs/1705.08741>_` will be used.
virtual_batch_size: int, default = 128
Batch size when using Ghost Batch Normalization
momentum: float, default = 0.02
Ghost Batch Normalization's momentum
gamma: float, default = 1.3
Relaxation parameter in the paper. When gamma = 1, a feature is
enforced to be used only at one decision step and as gamma
increases, more flexibility is provided to use a feature at
multiple decision steps
epsilon: float, default = 1e-15
Float to avoid log(0). Always keep low
mask_type: str, default = "sparsemax"
Mask function to use. Either "sparsemax" or "entmax"
Attributes
----------
embed_and_cont: ``nn.ModuleDict``
``ModuleDict`` with the embedding
TabNetEncoder: ``nn.Module``
``Module`` containing the TabNetEncoder. See the `paper
<https://arxiv.org/abs/1908.07442>`_.
output_dim: int
The output dimension of the model. This is a required attribute
neccesary to build the WideDeep class
Example
--------
>>> import torch
>>> from pytorch_widedeep.models import TabNet
>>> X_tab = torch.cat((torch.empty(5, 4).random_(4), torch.rand(5, 1)), axis=1)
>>> colnames = ['a', 'b', 'c', 'd', 'e']
>>> embed_input = [(u,i,j) for u,i,j in zip(colnames[:4], [4]*4, [8]*4)]
>>> column_idx = {k:v for v,k in enumerate(colnames)}
>>> model = TabNet(column_idx=column_idx, embed_input=embed_input, continuous_cols = ['e'])
"""
super(TabNet, self).__init__()
self.column_idx = column_idx
......@@ -422,9 +499,9 @@ class TabNet(nn.Module):
self.embed_dropout = embed_dropout
self.continuous_cols = continuous_cols
self.batchnorm_cont = batchnorm_cont
self.n_steps = n_steps
self.step_dim = step_dim
self.attn_dim = attn_dim
self.n_steps = n_steps
self.n_glu_step_dependent = n_glu_step_dependent
self.n_glu_shared = n_glu_shared
self.ghost_bn = ghost_bn
......@@ -467,6 +544,16 @@ class TabNet(nn.Module):
class TabNetPredLayer(nn.Module):
def __init__(self, inp, out):
r"""This class is a 'hack' required because TabNet is a very particular
model within ``WideDeep``.
TabNet's forward method within ``WideDeep`` outputs two tensors, one
with the last layer's activations and the sparse regularization
factor. Since the output needs to be collected by ``WideDeep`` to then
Sequentially build the output layer (connection to the output
neuron(s)) I need to code a custom TabNetPredLayer that accepts two
inputs. This will be used by the ``WideDeep`` class.
"""
super(TabNetPredLayer, self).__init__()
self.pred_layer = nn.Linear(inp, out, bias=False)
initialize_non_glu(self.pred_layer, inp, out)
......
......@@ -30,7 +30,8 @@ class WideDeep(nn.Module):
pred_dim: int = 1,
):
r"""Main collector class that combines all ``wide``, ``deeptabular``
(which can be a number of architectures), ``deeptext`` and ``deepimage`` models.
(which can be a number of architectures), ``deeptext`` and
``deepimage`` models.
There are two options to combine these models that correspond to the
two main architectures (there is a higher number of
......@@ -51,9 +52,9 @@ class WideDeep(nn.Module):
:class:`pytorch_widedeep.models.wide.Wide`
deeptabular: ``nn.Module``, Optional, default = None
currently ``pytorch-widedeep`` implements three possible
currently ``pytorch-widedeep`` implements four possible
architectures for the `deeptabular` component. These are:
``TabMlp``, ``TabResnet`` and ``TabTransformer``.
``TabMlp``, ``TabResnet``, ``TabNet`` and ``TabTransformer``.
1. ``TabMlp`` is simply an embedding layer encoding the categorical
features that are then concatenated and passed through a series of
......@@ -65,14 +66,18 @@ class WideDeep(nn.Module):
ResNet blocks formed by dense layers.
See ``pytorch_widedeep.models.deep_dense_resnet.TabResnet``
3. ``TabNet`` is detailed in `TabNet: Attentive Interpretable Tabular
Learning <https://arxiv.org/abs/1908.07442>`_. See
``pytorch_widedeep.models.tabnet.tab_net.TabNet``
3. ``TabTransformer`` is detailed in `TabTransformer: Tabular Data
Modeling Using Contextual Embeddings
<https://arxiv.org/pdf/2012.06678.pdf>`_. See
<https://arxiv.org/abs/2012.06678>`_. See
``pytorch_widedeep.models.tab_transformer.TabTransformer``
I recommend using on of these as ``deeptabular``. However, a
custom model as long as is consistent with the required
architecture. See
I recommend using on of these as ``deeptabular``. However, it is
possible to use a custom model as long as is consistent with the
required architecture. See
:class:`pytorch_widedeep.models.deep_dense.TabTransformer`.
deeptext: ``nn.Module``, Optional, default = None
......@@ -323,10 +328,12 @@ class WideDeep(nn.Module):
warnings.warn(
"'WideDeep' is a model comprised by multiple components and the 'deeptabular'"
" component is 'TabNet'. We recommend using 'TabNet' in isolation."
" This is because 'TabNet' uses sparse regularization which partially losses"
" The reasons are: i)'TabNet' uses sparse regularization which partially losses"
" its purpose when used in combination with other components."
" If you still want to use a multiple component model with 'TabNet',"
" consider setting 'lambda_sparse' to 0 during training",
" consider setting 'lambda_sparse' to 0 during training. ii) The feature"
" importances will be computed only for TabNet but the model will comprise multiple"
" components. Therefore, such importances will partially lose their 'meaning'.",
UserWarning,
)
if deeptext is not None and not hasattr(deeptext, "output_dim"):
......
import pandas as pd
import pytest
from sklearn.exceptions import NotFittedError
from pytorch_widedeep.preprocessing.base_preprocessor import (
BasePreprocessor,
check_is_fitted,
)
df = pd.DataFrame({"col1": ["a", "b", "c", "d", "e"], "col2": [1, 2, 3, 4, 5]})
class DummyPreprocessor(BasePreprocessor):
def __init__(self):
super().__init__()
def fit(self, df):
self.att1 = 1
self.att2 = 2
return df
def transform(self, df):
check_is_fitted(self, attributes=["att1", "att2"], all_or_any="any")
return df
def fit_transform(self, df):
return self.fit(df).transform(df)
class IncompletePreprocessor(BasePreprocessor):
def __init__(self):
super().__init__()
def fit(self, df):
return df
def transform(self, df):
return df
###############################################################################
#  test check_is_fitted with "any"
###############################################################################
def test_check_is_fitted():
dummy_preprocessor = DummyPreprocessor()
with pytest.raises(NotFittedError):
dummy_preprocessor.transform(df)
###############################################################################
#  test base_preprocessor raising NotImplemented error
###############################################################################
def test_base_non_implemented_error():
with pytest.raises(NotImplementedError):
incomplete_preprocessor = IncompletePreprocessor() # noqa: F841
incomplete_preprocessor.fit_transform(df)
......@@ -5,6 +5,7 @@ from sklearn.exceptions import NotFittedError
from pytorch_widedeep.preprocessing import TabPreprocessor
from pytorch_widedeep.utils.deeptabular_utils import LabelEncoder
from pytorch_widedeep.preprocessing.tab_preprocessor import embed_sz_rule
def create_test_dataset(input_type, input_type_2=None):
......@@ -225,3 +226,27 @@ def test_notfittederror():
)
with pytest.raises(NotFittedError):
processor.transform(df)
###############################################################################
# Test embeddings fastai's rule of thumb
###############################################################################
def test_embed_sz_rule_of_thumb():
embed_cols = ["col1", "col2"]
df = pd.DataFrame(
{
"col1": np.random.randint(10, size=100),
"col2": np.random.randint(20, size=100),
}
)
n_cats = {c: df[c].nunique() for c in ["col1", "col2"]}
embed_szs = {c: embed_sz_rule(nc) for c, nc in n_cats.items()}
tab_preprocessor = TabPreprocessor(embed_cols=embed_cols)
tdf = tab_preprocessor.fit_transform(df) # noqa: F841
out = [
tab_preprocessor.embed_dim[col] == embed_szs[col] for col in embed_szs.keys()
]
assert all(out)
import string
import numpy as np
import torch
import pytest
from pytorch_widedeep.wdtypes import WideDeep
from pytorch_widedeep.models.tabnet.tab_net import TabNet # noqa: F403
from pytorch_widedeep.models.tabnet.tabnet_utils import create_explain_matrix
# I am going over test this model due to the number of components
n_embed = 5
#  this is the number of embed_cols and cont_cols. So total num of cols =
#  n_cols * 2
n_cols = 2
batch_size = 10
colnames = list(string.ascii_lowercase)[: (n_cols * 2)]
embed_cols = [np.random.choice(np.arange(n_embed), batch_size) for _ in range(n_cols)]
cont_cols = [np.random.rand(batch_size) for _ in range(n_cols)]
X_tab = torch.from_numpy(np.vstack(embed_cols + cont_cols).transpose())
X_tab_emb = X_tab[:, :n_cols]
X_tab_cont = X_tab[:, n_cols:]
###############################################################################
# Test functioning using the defaults
###############################################################################
embed_input = [(u, i, 1) for u, i in zip(colnames[:2], [n_embed] * 2)]
model1 = TabNet(
column_idx={k: v for v, k in enumerate(colnames)},
embed_input=embed_input,
continuous_cols=colnames[n_cols:],
)
def test_embeddings_have_padding():
res = []
for k, v in model1.embed_and_cont.embed_layers.items():
res.append(v.weight.size(0) == n_embed + 1)
res.append(not torch.all(v.weight[0].bool()))
assert all(res)
def test_tabnet_output():
out1, out2 = model1(X_tab)
assert out1.size(0) == 10 and out1.size(1) == model1.step_dim
###############################################################################
# Test functioning with different types of masks
###############################################################################
@pytest.mark.parametrize(
"mask_type",
[
"sparsemax",
"entmax",
],
)
def test_mask_type(mask_type):
model = TabNet(
column_idx={k: v for v, k in enumerate(colnames)},
embed_input=embed_input,
continuous_cols=colnames[n_cols:],
mask_type=mask_type,
)
out1, out2 = model(X_tab)
assert out1.size(0) == 10 and out1.size(1) == model1.step_dim
###############################################################################
# Test functioning with/without ghost BN
###############################################################################
@pytest.mark.parametrize(
"ghost_bn",
[
True,
False,
],
)
def test_ghost_bn(ghost_bn):
model = TabNet(
column_idx={k: v for v, k in enumerate(colnames)},
embed_input=embed_input,
continuous_cols=colnames[n_cols:],
ghost_bn=ghost_bn,
)
out1, out2 = model(X_tab)
assert out1.size(0) == 10 and out1.size(1) == model1.step_dim
###############################################################################
# Test forward_mask method
###############################################################################
def test_forward_masks():
out1, out2 = model1.forward_masks(X_tab)
bsz, nfeat = X_tab.shape[0], X_tab.shape[1]
out = []
out.append(out1.shape[0] == bsz)
out.append(out1.shape[1] == nfeat)
for step in range(model1.n_steps):
out.append(out2[step].size(0) == bsz)
out.append(out2[step].size(1) == nfeat)
assert all(out)
###############################################################################
# Test create_explain_matrix
###############################################################################
def test_create_explain_matrix():
embed_input = [(u, i, 2) for u, i in zip(colnames[:2], [n_embed] * 2)]
continuous_cols = colnames[2:]
embed_cols = colnames[:2]
column_idx = {k: v for v, k in enumerate(colnames)}
tabnet = TabNet(
column_idx=column_idx,
embed_input=embed_input,
continuous_cols=continuous_cols,
)
wdmodel = WideDeep(deeptabular=tabnet)
expl_mtx = create_explain_matrix(wdmodel)
checks = []
checks.append(expl_mtx.sum() == tabnet.embed_and_cont_dim)
checks.append(all(expl_mtx.sum(1) == 1))
for col, idx in column_idx.items():
if col in embed_cols:
checks.append(expl_mtx[:, idx].sum() == 2.0)
elif col in continuous_cols:
checks.append(expl_mtx[:, idx].sum() == 1.0)
assert all(checks)
......@@ -3,14 +3,20 @@ from copy import deepcopy
import pytest
from torch import nn
from pytorch_widedeep.models import Wide, TabMlp, DeepText, WideDeep, DeepImage
from pytorch_widedeep.models import (
Wide,
TabMlp,
TabNet,
DeepText,
WideDeep,
DeepImage,
)
embed_input = [(u, i, j) for u, i, j in zip(["a", "b", "c"][:4], [4] * 3, [8] * 3)]
column_idx = {k: v for v, k in enumerate(["a", "b", "c"])}
wide = Wide(10, 1)
deepdense = TabMlp(
mlp_hidden_dims=[16, 8], column_idx=column_idx, embed_input=embed_input
)
tabmlp = TabMlp(mlp_hidden_dims=[16, 8], column_idx=column_idx, embed_input=embed_input)
tabnet = TabNet(column_idx=column_idx, embed_input=embed_input)
deeptext = DeepText(vocab_size=100, embed_dim=8)
deepimage = DeepImage(pretrained=False)
......@@ -29,16 +35,16 @@ deepimage = DeepImage(pretrained=False)
)
def test_history_callback(deepcomponent, component_name):
if deepcomponent is None:
deepcomponent = deepcopy(deepdense)
deepcomponent = deepcopy(tabmlp)
deepcomponent.__dict__.pop("output_dim")
with pytest.raises(AttributeError):
if component_name == "dense":
model = WideDeep(wide, deeptabular=deepcomponent)
elif component_name == "text":
model = WideDeep(wide, deeptabular=deepdense, deeptext=deepcomponent)
model = WideDeep(wide, deeptabular=tabmlp, deeptext=deepcomponent)
elif component_name == "image":
model = WideDeep( # noqa: F841
wide, deeptabular=deepdense, deepimage=deepcomponent
wide, deeptabular=tabmlp, deepimage=deepcomponent
)
......@@ -52,7 +58,7 @@ def test_deephead_and_head_layers_dim():
with pytest.raises(ValueError):
model = WideDeep( # noqa: F841
wide=wide,
deeptabular=deepdense,
deeptabular=tabmlp,
head_hidden_dims=[16, 8],
deephead=deephead,
)
......@@ -66,7 +72,7 @@ def test_deephead_and_head_layers_dim():
def test_no_deephead_and_head_layers_dim():
out = []
model = WideDeep(
wide=wide, deeptabular=deepdense, head_hidden_dims=[8, 4]
wide=wide, deeptabular=tabmlp, head_hidden_dims=[8, 4]
) # noqa: F841
for n, p in model.named_parameters():
if n == "deephead.head_layer_0.0.weight":
......@@ -74,3 +80,13 @@ def test_no_deephead_and_head_layers_dim():
if n == "deephead.head_layer_1.0.weight":
out.append(p.size(0) == 4 and p.size(1) == 8)
assert all(out)
###############################################################################
#  test tabnet warning
###############################################################################
def test_tabnet_warning():
with pytest.warns(UserWarning):
model = WideDeep(wide=wide, deeptabular=tabnet) # noqa: F841
import string
import warnings
import numpy as np
import pytest
from torch import nn
from pytorch_widedeep.models import Wide, TabMlp, WideDeep, TabTransformer
from pytorch_widedeep.models import (
Wide,
TabMlp,
TabNet,
WideDeep,
TabTransformer,
)
from pytorch_widedeep.metrics import R2Score
from pytorch_widedeep.training import Trainer
......@@ -138,6 +145,51 @@ def test_fit_objectives_tab_transformer(
assert preds.shape[0] == 32, probs.shape[1] == probs_dim
##############################################################################
# Repeat 1st set of tests with TabNet
##############################################################################
@pytest.mark.parametrize(
"X_wide, X_tab, target, objective, X_wide_test, X_tab_test, X_test, pred_dim, probs_dim",
[
(X_wide, X_tab, target_regres, "regression", X_wide, X_tab, None, 1, None),
(X_wide, X_tab, target_binary, "binary", X_wide, X_tab, None, 1, 2),
(X_wide, X_tab, target_multic, "multiclass", X_wide, X_tab, None, 3, 3),
(X_wide, X_tab, target_regres, "regression", None, None, X_test, 1, None),
(X_wide, X_tab, target_binary, "binary", None, None, X_test, 1, 2),
(X_wide, X_tab, target_multic, "multiclass", None, None, X_test, 3, 3),
],
)
def test_fit_objectives_tabnet(
X_wide,
X_tab,
target,
objective,
X_wide_test,
X_tab_test,
X_test,
pred_dim,
probs_dim,
):
warnings.filterwarnings("ignore")
wide = Wide(np.unique(X_wide).shape[0], pred_dim)
tab_transformer = TabNet(
column_idx={k: v for v, k in enumerate(colnames)},
embed_input=embed_input,
continuous_cols=colnames[5:],
)
model = WideDeep(wide=wide, deeptabular=tab_transformer, pred_dim=pred_dim)
trainer = Trainer(model, objective=objective, verbose=0)
trainer.fit(X_wide=X_wide, X_tab=X_tab, target=target, batch_size=16)
preds = trainer.predict(X_wide=X_wide, X_tab=X_tab, X_test=X_test)
if objective == "binary":
pass
else:
probs = trainer.predict_proba(X_wide=X_wide, X_tab=X_tab, X_test=X_test)
assert preds.shape[0] == 32, probs.shape[1] == probs_dim
##############################################################################
# Test fit with R2 for regression
##############################################################################
......
......@@ -10,6 +10,7 @@ from sklearn.model_selection import train_test_split
from pytorch_widedeep.models import (
Wide,
TabMlp,
TabNet,
DeepText,
WideDeep,
DeepImage,
......@@ -78,6 +79,12 @@ tabtransformer = TabTransformer(
embed_input=embed_input_tt,
continuous_cols=colnames[5:],
)
tabnet = TabNet(
column_idx={k: v for v, k in enumerate(colnames)},
embed_input=embed_input,
continuous_cols=colnames[5:],
ghost_bn=False,
)
deeptext = DeepText(vocab_size=vocab_size, embed_dim=32, padding_idx=0)
deepimage = DeepImage(pretrained=True)
......@@ -268,3 +275,31 @@ def test_save_and_load_dict():
shutil.rmtree("tests/test_model_functioning/model_dir/")
assert torch.allclose(wide_weights, n_wide_weights)
###############################################################################
#  test explain matrices and feature importance for TabNet
###############################################################################
def test_explain_mtx_and_feat_imp():
model = WideDeep(deeptabular=tabnet)
trainer = Trainer(model, objective="binary", verbose=0)
trainer.fit(
X_tab=X_tab,
target=target,
batch_size=16,
)
checks = []
checks.append(len(trainer.feature_importance) == len(tabnet.column_idx))
expl_mtx, step_masks = trainer.explain(X_tab[:6], save_step_masks=True)
checks.append(expl_mtx.shape[0] == 6)
checks.append(expl_mtx.shape[1] == 10)
for i in range(tabnet.n_steps):
checks.append(step_masks[i].shape[0] == 6)
checks.append(step_masks[i].shape[1] == 10)
assert all(checks)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册