added activation to head

dff1930a · Pavol Mulinka · 9d2f25a7 · dff1930a · dff1930a
隐藏空白更改
内联并排

Showing with 20 addition and 6 deletion

pytorch_widedeep/losses.py pytorch_widedeep/losses.py +3 -3

pytorch_widedeep/models/wide_deep.py pytorch_widedeep/models/wide_deep.py +17 -3

未找到文件。
--- a/pytorch_widedeep/losses.py
+++ b/pytorch_widedeep/losses.py
@@ -15,13 +15,13 @@ class TweedieLoss(nn.Module):
    <https://arxiv.org/abs/1811.10192>`
    """

-    def __init__():
+    def __init__(self):
        super().__init__()

    def forward(self, input: Tensor, target: Tensor, p=1.5) -> Tensor:

-        loss = - y * torch.pow(y_hat, 1 - p) / (1 - p) + \
-               torch.pow(y_hat, 2 - p) / (2 - p)
+        loss = - target * torch.pow(input, 1 - p) / (1 - p) + \
+               torch.pow(input, 2 - p) / (2 - p)
        return torch.mean(loss)



--- a/pytorch_widedeep/models/wide_deep.py
+++ b/pytorch_widedeep/models/wide_deep.py
@@ -16,7 +16,7 @@ import torch
 import torch.nn as nn

 from pytorch_widedeep.wdtypes import *  # noqa: F403
-from pytorch_widedeep.models.tab_mlp import MLP
+from pytorch_widedeep.models.tab_mlp import MLP, get_activation_fn
 from pytorch_widedeep.models.tabnet.tab_net import TabNetPredLayer

 warnings.filterwarnings("default", category=UserWarning)
@@ -87,6 +87,10 @@ class WideDeep(nn.Module):
        the order of the operations in the dense layer. If ``True``:
        ``[LIN -> ACT -> BN -> DP]``. If ``False``: ``[BN -> DP -> LIN ->
        ACT]``
+    head_activation_last: bool, default=False
+        If final layer has activation function or not. Important if you are using
+        loss functions non-negative input restrictions, e.g. RMSLE, or if you know
+        your predictions are limited only to <0, inf)
    pred_dim: int, default = 1
        Size of the final wide and deep output layer containing the
        predictions. `1` for regression and binary classification or number
@@ -131,6 +135,7 @@ class WideDeep(nn.Module):
        head_batchnorm: bool = False,
        head_batchnorm_last: bool = False,
        head_linear_first: bool = False,
+        head_activation_last: bool = False,
        pred_dim: int = 1,
    ):
        super(WideDeep, self).__init__()
@@ -154,6 +159,8 @@ class WideDeep(nn.Module):
        self.deeptext = deeptext
        self.deepimage = deepimage
        self.deephead = deephead
+        # to check when loss function is applied
+        self.head_activation_last = head_activation_last

        if self.deeptabular is not None:
            self.is_tabnet = deeptabular.__class__.__name__ == "TabNet"
@@ -206,12 +213,15 @@ class WideDeep(nn.Module):
            head_batchnorm_last,
            head_linear_first,
        )
-
        self.deephead.add_module(
            "head_out", nn.Linear(head_hidden_dims[-1], self.pred_dim)
        )
+        if self.head_activation_last:
+            self.deephead.add_module(
+                "head_act", get_activation_fn(head_activation)
+            )

-    def _add_pred_layer(self):
+    def _add_pred_layer(self, head_activation):
        if self.deeptabular is not None:
            if self.is_tabnet:
                self.deeptabular = nn.Sequential(
@@ -231,6 +241,10 @@ class WideDeep(nn.Module):
            self.deepimage = nn.Sequential(
                self.deepimage, nn.Linear(self.deepimage.output_dim, self.pred_dim)
            )
+        if self.head_activation_last:
+            self.deephead.add_module(
+                "head_act", get_activation_fn(head_activation)
+            )

    def _forward_wide(self, X):
        if self.wide is not None: