added embedding rules, MonteCarlo(uncertainty) prediction and removed running...

added embedding rules, MonteCarlo(uncertainty) prediction and removed running of the tests on draf requests

added embedding rules, MonteCarlo(uncertainty) prediction and removed running...
added embedding rules, MonteCarlo(uncertainty) prediction and removed running of the tests on draf requests
e2cf20e1 · Pavol Mulinka · c287c870 · e2cf20e1 · e2cf20e1 · e2cf20e1
6 changed file
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -11,6 +11,7 @@ on:
 jobs:
  codestyle:
    runs-on: ubuntu-latest
+    if: ${{ github.event_name == 'push' || !github.event.pull_request.draft }}
    steps:
    - uses: actions/checkout@v2
    - name: Set up Python 3.9
@@ -32,6 +33,7 @@ jobs:

  test:
    runs-on: ubuntu-latest
+    if: ${{ github.event_name == 'push' || !github.event.pull_request.draft }}
    strategy:
      fail-fast: true
      matrix:
@@ -59,6 +61,7 @@ jobs:
  finish: 
    needs: test
    runs-on: ubuntu-latest
+    if: ${{ github.event_name == 'push' || !github.event.pull_request.draft }}
    steps:
    - uses: actions/checkout@v2
    - name: Set up Python 3.9

--- a/VERSION
+++ b/VERSION
-1.0.11
\ No newline at end of file
+1.0.12
\ No newline at end of file
--- a/examples/14_Model_Uncertainty_prediction.ipynb
+++ b/examples/14_Model_Uncertainty_prediction.ipynb
--- a/pytorch_widedeep/preprocessing/tab_preprocessor.py
+++ b/pytorch_widedeep/preprocessing/tab_preprocessor.py
@@ -12,10 +12,24 @@ from pytorch_widedeep.preprocessing.base_preprocessor import (
 )


-def embed_sz_rule(n_cat):
-    r"""Rule of thumb to pick embedding size corresponding to ``n_cat``. Taken
-    from fastai's Tabular API"""
-    return min(600, round(1.6 * n_cat ** 0.56))
+def embed_sz_rule(n_cat: int, embedding_rule: str="fastai_new") -> int:
+    r"""Rule of thumb to pick embedding size corresponding to ``n_cat``. Default rule is taken
+    from recent fastai's Tabular API. The function also includes previously used rule by fastai
+    and rule included in the Google's Tensorflow documentation
+    
+    Parameters
+    ----------
+    n_cat: int
+        number of unique categorical values in a feature
+    embedding_rule: str, default = fastai_old
+        rule of thumb to be used for embedding vector size
+    """
+    if embedding_rule == 'google':
+        return int(round(n_cat**0.25))
+    elif embedding_rule == 'fastai_old':
+        return int(min(50, (n_cat//2) + 1))
+    else:
+        return int(min(600, round(1.6 * n_cat ** 0.56)))


 class TabPreprocessor(BasePreprocessor):
@@ -38,8 +52,15 @@ class TabPreprocessor(BasePreprocessor):
        :obj:`pytorch_widedeep.models.transformers._embedding_layers`
    auto_embed_dim: bool, default = True
        Boolean indicating whether the embedding dimensions will be
-        automatically defined via fastai's rule of thumb':
-        :math:`min(600, int(1.6 \times n_{cat}^{0.56}))`
+        automatically defined via rule of thumb
+    embedding_rule: str, default = 'fastai_new'
+        choice of embedding rule of thumb
+        'fastai_new': 
+            :math:`min(600, round(1.6 \times n_{cat}^{0.56}))`
+        'fastai_old': 
+            :math:`min(50, (n_{cat}//{2})+1)`
+        'google': 
+            :math:`min(600, round(n_{cat}^{0.24}))`
    default_embed_dim: int, default=16
        Dimension for the embeddings used for the ``deeptabular``
        component if the embed_dim is not provided in the ``embed_cols``
@@ -118,6 +139,7 @@ class TabPreprocessor(BasePreprocessor):
        continuous_cols: List[str] = None,
        scale: bool = True,
        auto_embed_dim: bool = True,
+        embedding_rule: str = "fastai_new",
        default_embed_dim: int = 16,
        already_standard: List[str] = None,
        for_transformer: bool = False,
@@ -131,6 +153,7 @@ class TabPreprocessor(BasePreprocessor):
        self.continuous_cols = continuous_cols
        self.scale = scale
        self.auto_embed_dim = auto_embed_dim
+        self.embedding_rule = embedding_rule
        self.default_embed_dim = default_embed_dim
        self.already_standard = already_standard
        self.for_transformer = for_transformer
@@ -250,7 +273,7 @@ class TabPreprocessor(BasePreprocessor):
                embed_colname = [emb[0] for emb in self.embed_cols]
            elif self.auto_embed_dim:
                n_cats = {col: df[col].nunique() for col in self.embed_cols}
-                self.embed_dim = {col: embed_sz_rule(n_cat) for col, n_cat in n_cats.items()}  # type: ignore[misc]
+                self.embed_dim = {col: embed_sz_rule(n_cat, self.embedding_rule) for col, n_cat in n_cats.items()}  # type: ignore[misc]
                embed_colname = self.embed_cols  # type: ignore
            else:
                self.embed_dim = {e: self.default_embed_dim for e in self.embed_cols}  # type: ignore

--- a/pytorch_widedeep/training/trainer.py
+++ b/pytorch_widedeep/training/trainer.py
@@ -27,7 +27,7 @@ from pytorch_widedeep.callbacks import (
 from pytorch_widedeep.dataloaders import DataLoaderDefault
 from pytorch_widedeep.initializers import Initializer, MultipleInitializer
 from pytorch_widedeep.training._finetune import FineTune
-from pytorch_widedeep.utils.general_utils import Alias
+from pytorch_widedeep.utils.general_utils import Alias, set_default_attr
 from pytorch_widedeep.models.tabnet._utils import create_explain_matrix
 from pytorch_widedeep.training._wd_dataset import WideDeepDataset
 from pytorch_widedeep.training._trainer_utils import (
@@ -685,8 +685,14 @@ class Trainer:
            If a trainer is used to predict after having trained a model, the
            ``batch_size`` needs to be defined as it will not be defined as
            the :obj:`Trainer` is instantiated
+        uncertainty: bool, default = False
+            If set to True the model activates the dropout layers and predicts 
+            the each sample N times (uncertainty_granularity times) and returns 
+            {max, min, mean, stdev} value for each sample
+        uncertainty_granularity: int default = 1000
+            number of times the model does prediction for each sample if uncertainty
+            is set to True
        """
-
        preds_l = self._predict(X_wide, X_tab, X_text, X_img, X_test, batch_size)
        if self.method == "regression":
            return np.vstack(preds_l).squeeze(1)
@@ -697,6 +703,86 @@ class Trainer:
            preds = np.vstack(preds_l)
            return np.argmax(preds, 1)  # type: ignore[return-value]

+    def predict_uncertainty(  # type: ignore[return]
+        self,
+        X_wide: Optional[np.ndarray] = None,
+        X_tab: Optional[np.ndarray] = None,
+        X_text: Optional[np.ndarray] = None,
+        X_img: Optional[np.ndarray] = None,
+        X_test: Optional[Dict[str, np.ndarray]] = None,
+        batch_size: int = 256,
+        uncertainty_granularity = 1000,
+    ) -> np.ndarray:
+        r"""Returns the predicted ucnertainty of the model for the test dataset using a 
+        Monte Carlo method during which dropout layers are activated in the evaluation/prediction
+        phase and each sample is predicted N times (uncertainty_granularity times). Based on [1].
+
+        [1] Gal Y. & Ghahramani Z., 2016, Dropout as a Bayesian Approximation: Representing Model
+        Uncertainty in Deep Learning, Proceedings of the 33rd International Conference on Machine Learning
+
+        Parameters
+        ----------
+        X_wide: np.ndarray, Optional. default=None
+            Input for the ``wide`` model component.
+            See :class:`pytorch_widedeep.preprocessing.WidePreprocessor`
+        X_tab: np.ndarray, Optional. default=None
+            Input for the ``deeptabular`` model component.
+            See :class:`pytorch_widedeep.preprocessing.TabPreprocessor`
+        X_text: np.ndarray, Optional. default=None
+            Input for the ``deeptext`` model component.
+            See :class:`pytorch_widedeep.preprocessing.TextPreprocessor`
+        X_img : np.ndarray, Optional. default=None
+            Input for the ``deepimage`` model component.
+            See :class:`pytorch_widedeep.preprocessing.ImagePreprocessor`
+        X_test: Dict, Optional. default=None
+            The test dataset can also be passed in a dictionary. Keys are
+            `X_wide`, `'X_tab'`, `'X_text'`, `'X_img'` and `'target'`. Values
+            are the corresponding matrices.
+        batch_size: int, default = 256
+            If a trainer is used to predict after having trained a model, the
+            ``batch_size`` needs to be defined as it will not be defined as
+            the :obj:`Trainer` is instantiated
+        uncertainty_granularity: int default = 1000
+            number of times the model does prediction for each sample if uncertainty
+            is set to True
+
+        Returns 
+        -------
+            method == regression : np.ndarray
+                {max, min, mean, stdev} values for each sample for 
+            method == binary : np.ndarray
+                {mean_cls_0_prob, mean_cls_1_prob, predicted_cls} values for each sample for 
+            method == multiclass : np.ndarray
+                {mean_cls_0_prob, mean_cls_1_prob, mean_cls_2_prob, ... , predicted_cls} values for each sample for 
+
+        """
+        preds_l = self._predict(X_wide, X_tab, X_text, X_img, X_test, batch_size,
+                                uncertainty_granularity, uncertainty=True)
+        preds = np.vstack(preds_l)
+        samples_num = int(preds.shape[0]/uncertainty_granularity)
+        if self.method == "regression":
+            preds = preds.squeeze(1)
+            preds = preds.reshape((uncertainty_granularity, samples_num))
+            return np.array((
+                preds.max(axis=0),
+                preds.min(axis=0),
+                preds.mean(axis=0), 
+                preds.std(axis=0))).T
+        if self.method == "binary":
+            preds = preds.squeeze(1)
+            preds = preds.reshape((uncertainty_granularity, samples_num))
+            preds = preds.mean(axis=0)
+            probs = np.zeros([preds.shape[0], 3])
+            probs[:, 0] = 1 - preds
+            probs[:, 1] = preds
+            probs[:, 2] = (preds > 0.5).astype("int")
+            return probs
+        if self.method == "multiclass":
+            preds = preds.reshape(uncertainty_granularity, samples_num, preds.shape[1])
+            preds = preds.mean(axis=0)
+            preds = np.hstack((preds, np.vstack(np.argmax(preds, 1))))
+            return preds
+
    def predict_proba(  # type: ignore[return]
        self,
        X_wide: Optional[np.ndarray] = None,
@@ -1112,6 +1198,8 @@ class Trainer:
        X_img: Optional[np.ndarray] = None,
        X_test: Optional[Dict[str, np.ndarray]] = None,
        batch_size: int = 256,
+        uncertainty_granularity = 1000,
+        uncertainty: bool = False,
    ) -> List:
        r"""Private method to avoid code repetition in predict and
        predict_proba. For parameter information, please, see the .predict()
@@ -1144,20 +1232,33 @@ class Trainer:

        self.model.eval()
        preds_l = []
+        
+        if uncertainty:
+            for m in self.model.modules():
+                if m.__class__.__name__.startswith('Dropout'):
+                    m.train()
+            prediction_iters = uncertainty_granularity
+        else:
+            prediction_iters = 1
+
        with torch.no_grad():
-            with trange(test_steps, disable=self.verbose != 1) as t:
-                for i, data in zip(t, test_loader):
-                    t.set_description("predict")
-                    X = {k: v.cuda() for k, v in data.items()} if use_cuda else data
-                    preds = (
-                        self.model(X) if not self.model.is_tabnet else self.model(X)[0]
-                    )
-                    if self.method == "binary":
-                        preds = torch.sigmoid(preds)
-                    if self.method == "multiclass":
-                        preds = F.softmax(preds, dim=1)
-                    preds = preds.cpu().data.numpy()
-                    preds_l.append(preds)
+            with trange(uncertainty_granularity, disable=uncertainty is False) as t:
+                for i, k in zip(t, range(prediction_iters)):
+                    t.set_description("predict_UncertaintyIter")
+
+                    with trange(test_steps, disable=self.verbose != 1 or uncertainty is True) as tt:
+                        for j, data in zip(tt, test_loader):
+                            tt.set_description("predict")
+                            X = {k: v.cuda() for k, v in data.items()} if use_cuda else data
+                            preds = (
+                                self.model(X) if not self.model.is_tabnet else self.model(X)[0]
+                            )
+                            if self.method == "binary":
+                                preds = torch.sigmoid(preds)
+                            if self.method == "multiclass":
+                                preds = F.softmax(preds, dim=1)
+                            preds = preds.cpu().data.numpy()
+                            preds_l.append(preds)
        self.model.train()
        return preds_l


--- a/pytorch_widedeep/version.py
+++ b/pytorch_widedeep/version.py
-__version__ = "1.0.11"
+__version__ = "1.0.12"