From 502de688ba5f767dc2ee9feb38a4639a1beb75da Mon Sep 17 00:00:00 2001
From: Javier <jrzaurin@gmail.com>
Date: Sun, 17 Mar 2019 19:45:38 +0000
Subject: [PATCH] adding function to use different optimizers for wide and deep
 parts

---
 main.py                  |   6 ++
 wide_deep/torch_model.py | 132 ++++++++++++++++++++++++++++++---------
 2 files changed, 110 insertions(+), 28 deletions(-)

diff --git a/main.py b/main.py
index 2566bfe..6c555b3 100644
--- a/main.py
+++ b/main.py
@@ -55,6 +55,12 @@ if __name__ == '__main__':
         dropout,
         encoding_dict,
         n_class)
+    # if multiple compilers for wide and deep side:
+    # optimizer={'wide': ['name', lr, momentum], 'deep': ['name', lr, momentum]}
+    # for example:
+    # optimizer={'wide': ['SGD', 0.001, 0.1], 'deep': ['Adam', 0.001]}
+    # and
+    # model.compile(method=method, optimizer=optimizer)
     model.compile(method=method)
     if use_cuda:
         model = model.cuda()
diff --git a/wide_deep/torch_model.py b/wide_deep/torch_model.py
index 55881dc..3d6b85c 100644
--- a/wide_deep/torch_model.py
+++ b/wide_deep/torch_model.py
@@ -8,14 +8,35 @@ import torch.optim as optim
 from torch.autograd import Variable
 from torch.utils.data import Dataset, DataLoader
 
+
 use_cuda = torch.cuda.is_available()
 
 
+class MultipleOptimizer(object):
+    """Helper to use multiple optimizers as one.
+
+    Parameters:
+    ----------
+    opts: List
+        List with the names of the optimizers to use
+    """
+    def __init__(self, opts):
+        self.optimizers = opts
+
+    def zero_grad(self):
+        for op in self.optimizers:
+            op.zero_grad()
+
+    def step(self):
+        for op in self.optimizers:
+            op.step()
+
+
 class WideDeepLoader(Dataset):
     """Helper to facilitate loading the data to the pytorch models.
 
     Parameters:
-    --------
+    ----------
     data: namedtuple with 3 elements - (wide_input_data, deep_inp_data, target)
     """
     def __init__(self, data):
@@ -45,16 +66,23 @@ class WideDeep(nn.Module):
 
     Parameters:
     --------
-    wide_dim (int) : dim of the wide-side input tensor
-    embeddings_input (tuple): 3-elements tuple with the embeddings "set-up" -
-    (col_name, unique_values, embeddings dim)
-    continuous_cols (list) : list with the name of the continuum columns
-    deep_column_idx (dict) : dictionary where the keys are column names and the values
-    their corresponding index in the deep-side input tensor
-    hidden_layers (list) : list with the number of units per hidden layer
-    encoding_dict (dict) : dictionary with the label-encode mapping
-    n_class (int) : number of classes. Defaults to 1 if logistic or regression
-    dropout (float)
+    wide_dim: Int
+        dim of the wide-side input tensor
+    embeddings_input: Tuple.
+        3-elements tuple with the embeddings "set-up" - (col_name,
+        unique_values, embeddings dim)
+    continuous_cols: List.
+        list with the name of the continuum columns
+    deep_column_idx: Dict
+        dictionary where the keys are column names and the values their
+        corresponding index in the deep-side input tensor
+    hidden_layers: List
+        list with the number of units per hidden layer
+    encoding_dict: Dict
+        dictionary with the label-encode mapping
+    n_class: Int
+        number of classes. Defaults to 1 if logistic or regression
+    dropout: Float
     """
 
     def __init__(self,
@@ -95,29 +123,77 @@ class WideDeep(nn.Module):
         self.output = nn.Linear(self.hidden_layers[-1]+self.wide_dim, self.n_class)
 
 
-    def compile(self, method="logistic", optimizer="Adam", learning_rate=0.001, momentum=0.0):
-        """Wrapper to set the activation, loss and the optimizer.
-
-        Parameters:
-        ----------
-        method (str) : regression, logistic or multiclass
-        optimizer (str): SGD, Adam, or RMSprop
+    @staticmethod
+    def set_optimizer(model_params, optimizer, learning_rate, momentum=0.0):
         """
-        if method == 'regression':
-            self.activation, self.criterion = None, F.mse_loss
-        if method == 'logistic':
-            self.activation, self.criterion = torch.sigmoid, F.binary_cross_entropy
-        if method == 'multiclass':
-            self.activation, self.criterion = F.softmax, F.cross_entropy
 
+        Simple helper so we can set the optimizers with a string, which will
+        be convenient later. Add more parameters if you need.
+        """
         if optimizer == "Adam":
-            self.optimizer = torch.optim.Adam(self.parameters(), lr=learning_rate)
+            return torch.optim.Adam(model_params, lr=learning_rate)
+        if optimizer == "Adagrad":
+            return torch.optim.Adam(model_params, lr=learning_rate)
         if optimizer == "RMSprop":
-            self.optimizer = torch.optim.RMSprop(self.parameters(), lr=learning_rate)
+            return torch.optim.RMSprop(model_params, lr=learning_rate, momentum=momentum)
         if optimizer == "SGD":
-            self.optimizer = torch.optim.SGD(self.parameters(), lr=learning_rate, momentum=momentum)
+            return torch.optim.SGD(model_params, lr=learning_rate, momentum=momentum)
+
+
+    @staticmethod
+    def set_method(method):
+        """
+        Simple helper so we can set the method with a string, which will
+        be convenient later.
+        """
+        if method =='regression':
+            return None, F.mse_loss
+        if method =='logistic':
+            return torch.sigmoid, F.binary_cross_entropy
+        if method=='multiclass':
+            return F.softmax, F.cross_entropy
+
+
+    def compile(self, method="logistic", optimizer="Adam", learning_rate=0.001, momentum=0.0):
+        """Wrapper to set the activation, loss and the optimizer.
 
+        Parameters:
+        ----------
+        method: String
+            'regression', 'logistic' or 'multiclass'
+        optimizer: String or Dict
+            if string one of the following: 'SGD', 'Adam', or 'RMSprop'
+            if Dictionary must contain two elements for the wide and deep
+            parts respectively with keys 'wide' and 'deep'. E.g.
+            optimizer = {'wide: ['SGD', 0.001, 0.3]', 'deep':['Adam', 0.001]}
+        """
         self.method = method
+        self.activation, self.criterion = self.set_method(method)
+
+        if type(optimizer) is dict:
+            params = list(self.parameters())
+            # last two sets of parameters are the weights and bias of the last
+            # linear layer
+            last_linear_weights = params[-2]
+            # by construction, if the weights from wide_dim "in advance"
+            # correspond to the weight side and will use one optimizer
+            wide_params = [nn.Parameter(last_linear_weights[:, -self.wide_dim:])]
+            # The weights from the deep side and the bias will use the other
+            # optimizer
+            deep_weights = last_linear_weights[:, :-self.wide_dim]
+            deep_params = params[:-2] + [nn.Parameter(deep_weights)] + [params[-1]]
+            # Very inelegant, but will do for now
+            if len(optimizer['wide'])>2:
+                wide_opt = self.set_optimizer(wide_params, optimizer['wide'][0], optimizer['wide'][1], optimizer['wide'][2])
+            else:
+                wide_opt = self.set_optimizer(wide_params, optimizer['wide'][0], optimizer['wide'][1])
+            if len(optimizer['deep'])>2:
+                deep_opt = self.set_optimizer(deep_params, optimizer['deep'][0], optimizer['deep'][1], optimizer['deep'][2])
+            else:
+                deep_opt = self.set_optimizer(deep_params, optimizer['deep'][0], optimizer['deep'][1])
+            self.optimizer = MultipleOptimizer([wide_opt, deep_opt])
+        elif type(optimizer) is str:
+            self.optimizer = self.set_optimizer(self.parameters(), optimizer, learning_rate, momentum)
 
 
     def forward(self, X_w, X_d):
@@ -192,7 +268,7 @@ class WideDeep(nn.Module):
                     X_w, X_d, y = X_w.cuda(), X_d.cuda(), y.cuda()
 
                 self.optimizer.zero_grad()
-                y_pred =  net(X_w, X_d) # [batch_size, 1]
+                y_pred =  net(X_w, X_d)
                 loss = None
                 if(self.criterion == F.cross_entropy):
                     loss = self.criterion(y_pred, y) #[batch_size, 1]
-- 
GitLab