From 502de688ba5f767dc2ee9feb38a4639a1beb75da Mon Sep 17 00:00:00 2001 From: Javier Date: Sun, 17 Mar 2019 19:45:38 +0000 Subject: [PATCH] adding function to use different optimizers for wide and deep parts --- main.py | 6 ++ wide_deep/torch_model.py | 132 ++++++++++++++++++++++++++++++--------- 2 files changed, 110 insertions(+), 28 deletions(-) diff --git a/main.py b/main.py index 2566bfe..6c555b3 100644 --- a/main.py +++ b/main.py @@ -55,6 +55,12 @@ if __name__ == '__main__': dropout, encoding_dict, n_class) + # if multiple compilers for wide and deep side: + # optimizer={'wide': ['name', lr, momentum], 'deep': ['name', lr, momentum]} + # for example: + # optimizer={'wide': ['SGD', 0.001, 0.1], 'deep': ['Adam', 0.001]} + # and + # model.compile(method=method, optimizer=optimizer) model.compile(method=method) if use_cuda: model = model.cuda() diff --git a/wide_deep/torch_model.py b/wide_deep/torch_model.py index 55881dc..3d6b85c 100644 --- a/wide_deep/torch_model.py +++ b/wide_deep/torch_model.py @@ -8,14 +8,35 @@ import torch.optim as optim from torch.autograd import Variable from torch.utils.data import Dataset, DataLoader + use_cuda = torch.cuda.is_available() +class MultipleOptimizer(object): + """Helper to use multiple optimizers as one. + + Parameters: + ---------- + opts: List + List with the names of the optimizers to use + """ + def __init__(self, opts): + self.optimizers = opts + + def zero_grad(self): + for op in self.optimizers: + op.zero_grad() + + def step(self): + for op in self.optimizers: + op.step() + + class WideDeepLoader(Dataset): """Helper to facilitate loading the data to the pytorch models. Parameters: - -------- + ---------- data: namedtuple with 3 elements - (wide_input_data, deep_inp_data, target) """ def __init__(self, data): @@ -45,16 +66,23 @@ class WideDeep(nn.Module): Parameters: -------- - wide_dim (int) : dim of the wide-side input tensor - embeddings_input (tuple): 3-elements tuple with the embeddings "set-up" - - (col_name, unique_values, embeddings dim) - continuous_cols (list) : list with the name of the continuum columns - deep_column_idx (dict) : dictionary where the keys are column names and the values - their corresponding index in the deep-side input tensor - hidden_layers (list) : list with the number of units per hidden layer - encoding_dict (dict) : dictionary with the label-encode mapping - n_class (int) : number of classes. Defaults to 1 if logistic or regression - dropout (float) + wide_dim: Int + dim of the wide-side input tensor + embeddings_input: Tuple. + 3-elements tuple with the embeddings "set-up" - (col_name, + unique_values, embeddings dim) + continuous_cols: List. + list with the name of the continuum columns + deep_column_idx: Dict + dictionary where the keys are column names and the values their + corresponding index in the deep-side input tensor + hidden_layers: List + list with the number of units per hidden layer + encoding_dict: Dict + dictionary with the label-encode mapping + n_class: Int + number of classes. Defaults to 1 if logistic or regression + dropout: Float """ def __init__(self, @@ -95,29 +123,77 @@ class WideDeep(nn.Module): self.output = nn.Linear(self.hidden_layers[-1]+self.wide_dim, self.n_class) - def compile(self, method="logistic", optimizer="Adam", learning_rate=0.001, momentum=0.0): - """Wrapper to set the activation, loss and the optimizer. - - Parameters: - ---------- - method (str) : regression, logistic or multiclass - optimizer (str): SGD, Adam, or RMSprop + @staticmethod + def set_optimizer(model_params, optimizer, learning_rate, momentum=0.0): """ - if method == 'regression': - self.activation, self.criterion = None, F.mse_loss - if method == 'logistic': - self.activation, self.criterion = torch.sigmoid, F.binary_cross_entropy - if method == 'multiclass': - self.activation, self.criterion = F.softmax, F.cross_entropy + Simple helper so we can set the optimizers with a string, which will + be convenient later. Add more parameters if you need. + """ if optimizer == "Adam": - self.optimizer = torch.optim.Adam(self.parameters(), lr=learning_rate) + return torch.optim.Adam(model_params, lr=learning_rate) + if optimizer == "Adagrad": + return torch.optim.Adam(model_params, lr=learning_rate) if optimizer == "RMSprop": - self.optimizer = torch.optim.RMSprop(self.parameters(), lr=learning_rate) + return torch.optim.RMSprop(model_params, lr=learning_rate, momentum=momentum) if optimizer == "SGD": - self.optimizer = torch.optim.SGD(self.parameters(), lr=learning_rate, momentum=momentum) + return torch.optim.SGD(model_params, lr=learning_rate, momentum=momentum) + + + @staticmethod + def set_method(method): + """ + Simple helper so we can set the method with a string, which will + be convenient later. + """ + if method =='regression': + return None, F.mse_loss + if method =='logistic': + return torch.sigmoid, F.binary_cross_entropy + if method=='multiclass': + return F.softmax, F.cross_entropy + + + def compile(self, method="logistic", optimizer="Adam", learning_rate=0.001, momentum=0.0): + """Wrapper to set the activation, loss and the optimizer. + Parameters: + ---------- + method: String + 'regression', 'logistic' or 'multiclass' + optimizer: String or Dict + if string one of the following: 'SGD', 'Adam', or 'RMSprop' + if Dictionary must contain two elements for the wide and deep + parts respectively with keys 'wide' and 'deep'. E.g. + optimizer = {'wide: ['SGD', 0.001, 0.3]', 'deep':['Adam', 0.001]} + """ self.method = method + self.activation, self.criterion = self.set_method(method) + + if type(optimizer) is dict: + params = list(self.parameters()) + # last two sets of parameters are the weights and bias of the last + # linear layer + last_linear_weights = params[-2] + # by construction, if the weights from wide_dim "in advance" + # correspond to the weight side and will use one optimizer + wide_params = [nn.Parameter(last_linear_weights[:, -self.wide_dim:])] + # The weights from the deep side and the bias will use the other + # optimizer + deep_weights = last_linear_weights[:, :-self.wide_dim] + deep_params = params[:-2] + [nn.Parameter(deep_weights)] + [params[-1]] + # Very inelegant, but will do for now + if len(optimizer['wide'])>2: + wide_opt = self.set_optimizer(wide_params, optimizer['wide'][0], optimizer['wide'][1], optimizer['wide'][2]) + else: + wide_opt = self.set_optimizer(wide_params, optimizer['wide'][0], optimizer['wide'][1]) + if len(optimizer['deep'])>2: + deep_opt = self.set_optimizer(deep_params, optimizer['deep'][0], optimizer['deep'][1], optimizer['deep'][2]) + else: + deep_opt = self.set_optimizer(deep_params, optimizer['deep'][0], optimizer['deep'][1]) + self.optimizer = MultipleOptimizer([wide_opt, deep_opt]) + elif type(optimizer) is str: + self.optimizer = self.set_optimizer(self.parameters(), optimizer, learning_rate, momentum) def forward(self, X_w, X_d): @@ -192,7 +268,7 @@ class WideDeep(nn.Module): X_w, X_d, y = X_w.cuda(), X_d.cuda(), y.cuda() self.optimizer.zero_grad() - y_pred = net(X_w, X_d) # [batch_size, 1] + y_pred = net(X_w, X_d) loss = None if(self.criterion == F.cross_entropy): loss = self.criterion(y_pred, y) #[batch_size, 1] -- GitLab