diff --git a/examples/main_adult.py b/examples/main_adult.py index 85da74c8c0f3f718c6206868d181067772e85736..3a45bdc9367468fc963f6844ca4b0886d3651d87 100644 --- a/examples/main_adult.py +++ b/examples/main_adult.py @@ -3,11 +3,9 @@ import pandas as pd import torch from pathlib import Path -from pytorch_widedeep.preprocessing import WideProcessor, DeepProcessor +from pytorch_widedeep.preprocessing import WidePreprocessor, DeepPreprocessor from pytorch_widedeep.models import Wide, DeepDense, WideDeep from pytorch_widedeep.initializers import * -from pytorch_widedeep.optimizers import * -from pytorch_widedeep.lr_schedulers import * from pytorch_widedeep.callbacks import * from pytorch_widedeep.metrics import * @@ -34,9 +32,9 @@ if __name__ == '__main__': target = 'income_label' target = df[target].values - prepare_wide = WideProcessor(wide_cols=wide_cols, crossed_cols=crossed_cols) + prepare_wide = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols) X_wide = prepare_wide.fit_transform(df) - prepare_deep = DeepProcessor(embed_cols=cat_embed_cols, continuous_cols=continuous_cols) + prepare_deep = DeepPreprocessor(embed_cols=cat_embed_cols, continuous_cols=continuous_cols) X_deep = prepare_deep.fit_transform(df) wide = Wide( @@ -51,11 +49,17 @@ if __name__ == '__main__': output_dim=1) model = WideDeep(wide=wide, deepdense=deepdense) - initializers = {'wide': KaimingNormal, 'deepdense':KaimingNormal} - optimizers = {'wide': RAdam, 'deepdense':RAdam } - schedulers = {'wide': StepLR(step_size=25), 'deepdense':StepLR(step_size=25)} + wide_opt = torch.optim.Adam(model.wide.parameters()) + deep_opt = torch.optim.Adam(model.deepdense.parameters()) - callbacks = [EarlyStopping, ModelCheckpoint(filepath='../model_weights/wd_out')] + wide_sch = torch.optim.lr_scheduler.StepLR(wide_opt, step_size=3) + deep_sch = torch.optim.lr_scheduler.StepLR(deep_opt, step_size=5) + + optimizers = {'wide': wide_opt, 'deepdense':deep_opt} + schedulers = {'wide': wide_sch, 'deepdense':deep_sch} + + initializers = {'wide': Normal, 'deepdense':Normal} + callbacks = [LRHistory, EarlyStopping, ModelCheckpoint(filepath='../model_weights/wd_out')] metrics = [BinaryAccuracy] model.compile( @@ -70,7 +74,7 @@ if __name__ == '__main__': X_wide=X_wide, X_deep=X_deep, target=target, - n_epochs=50, + n_epochs=10, batch_size=256, val_split=0.2) pdb.set_trace() diff --git a/examples/main_airbnb.py b/examples/main_airbnb.py index 221ed059ff27210c7a152ea31741ae968d07c260..3f18d9d22dd52b6ac9b58a30f18e06613456808c 100644 --- a/examples/main_airbnb.py +++ b/examples/main_airbnb.py @@ -9,8 +9,7 @@ from pytorch_widedeep.models import (Wide, DeepDense, DeepText, DeepImage, WideDeep) from pytorch_widedeep.initializers import * from pytorch_widedeep.callbacks import * -from pytorch_widedeep.optimizers import * -from pytorch_widedeep.lr_schedulers import * +from pytorch_widedeep.optim import RAdam import pdb @@ -69,10 +68,19 @@ if __name__ == '__main__': model = WideDeep(wide=wide, deepdense=deepdense, deeptext=deeptext, deepimage=deepimage) + wide_opt = torch.optim.Adam(model.wide.parameters()) + deep_opt = torch.optim.Adam(model.deepdense.parameters()) + text_opt = RAdam(model.deeptext.parameters()) + img_opt = RAdam(model.deepimage.parameters()) + + wide_sch = torch.optim.lr_scheduler.StepLR(wide_opt, step_size=5) + deep_sch = torch.optim.lr_scheduler.StepLR(deep_opt, step_size=3) + text_sch = torch.optim.lr_scheduler.StepLR(text_opt, step_size=5) + img_sch = torch.optim.lr_scheduler.StepLR(img_opt, step_size=3) + + optimizers = {'wide': wide_opt, 'deepdense':deep_opt, 'deeptext':text_opt, 'deepimage': img_opt} + schedulers = {'wide': wide_sch, 'deepdense':deep_sch, 'deeptext':text_sch, 'deepimage': img_sch} initializers = {'wide': Normal, 'deepdense':Normal, 'deeptext':Normal, 'deepimage':Normal} - optimizers = {'wide': Adam, 'deepdense':Adam, 'deeptext':RAdam, 'deepimage':Adam} - schedulers = {'wide': StepLR(step_size=5), 'deepdense':StepLR(step_size=5), 'deeptext':MultiStepLR(milestones=[5,8]), - 'deepimage':MultiStepLR(milestones=[5,8])} mean = [0.406, 0.456, 0.485] #BGR std = [0.225, 0.224, 0.229] #BGR transforms = [ToTensor, Normalize(mean=mean, std=std)] diff --git a/pytorch_widedeep/callbacks.py b/pytorch_widedeep/callbacks.py index 920051b4518c5b543741822eef7334cf2bee868b..716ea603085e6e7f1c155ae0f7db2e2ecaabefa7 100644 --- a/pytorch_widedeep/callbacks.py +++ b/pytorch_widedeep/callbacks.py @@ -198,7 +198,7 @@ class ModelCheckpoint(Callback): self.epochs_since_last_save = 0 self.max_save = max_save - root_dir = filepath.split("/")[:-1][0] + root_dir = ('/').join(filepath.split("/")[:-1]) if not os.path.exists(root_dir): os.makedirs(root_dir) diff --git a/pytorch_widedeep/initializers.py b/pytorch_widedeep/initializers.py index 22cbb35f56ae9304d4db0573bcd9bb7e28bae3df..84e4b5ceef74e9cdb4668c363eb70351c03b3788 100644 --- a/pytorch_widedeep/initializers.py +++ b/pytorch_widedeep/initializers.py @@ -12,7 +12,7 @@ class Initializer(object): raise NotImplementedError('Initializer must implement this method') -class MultipleInitializers(object): +class MultipleInitializer(object): def __init__(self, initializers:Dict[str, Initializer], verbose=True): diff --git a/pytorch_widedeep/lr_schedulers.py b/pytorch_widedeep/lr_schedulers.py deleted file mode 100644 index 18d2627656de14be51268bd6fd2fcc27d65b1bfa..0000000000000000000000000000000000000000 --- a/pytorch_widedeep/lr_schedulers.py +++ /dev/null @@ -1,149 +0,0 @@ -import torch - -from torch import nn -from .wdtypes import * - - -class MultipleLRScheduler(object): - - def __init__(self,schedulers:Dict[str,LRScheduler]): - - instantiated_schedulers = {} - for model_name, scheduler in schedulers.items(): - if isinstance(scheduler, type): - instantiated_schedulers[model_name] = scheduler() - else: instantiated_schedulers[model_name] = scheduler - self._schedulers = instantiated_schedulers - - def apply(self, optimizers:Dict[str, Optimizer]): - for model_name, optimizer in optimizers.items(): - if model_name in self._schedulers: - self._schedulers[model_name] = self._schedulers[model_name](optimizer) - else: pass - - def step(self, loss=None): - for _, sc in self._schedulers.items(): - if 'ReduceLROnPlateau' == sc.__class__.__name__: sc.step(loss) - else: sc.step() - - -class StepLR: - - def __init__(self, step_size, gamma=0.1, last_epoch=-1): - - self.step_size = step_size - self.gamma = gamma - self.last_epoch = last_epoch - - def __call__(self, optimizer:Optimizer): - self.sch = torch.optim.lr_scheduler.StepLR(optimizer, step_size=self.step_size, gamma=self.gamma, - last_epoch=self.last_epoch) - return self.sch - - -class MultiStepLR: - - def __init__(self, milestones, gamma=0.1, last_epoch=-1): - - self.milestones = milestones - self.gamma = gamma - self.last_epoch = last_epoch - - def __call__(self, optimizer:Optimizer): - self.sch = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=self.milestones, gamma=self.gamma, - last_epoch=self.last_epoch) - return self.sch - - -class ExponentialLR: - - def __init__(self, gamma, last_epoch=-1): - - self.gamma = gamma - self.last_epoch = last_epoch - - def __call__(self, optimizer:Optimizer): - self.sch = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=self.gamma, - last_epoch=self.last_epoch) - return self.sch - - -class ReduceLROnPlateau: - - def __init__(self, mode='min', factor=0.1, patience=10, verbose=False, threshold=0.0001, - threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-08): - - self.mode=mode - self.factor=factor - self.patience=patience - self.verbose=verbose - self.threshold=threshold - self.threshold_mode=threshold_mode - self.cooldown=cooldown - self.min_lr=min_lr - self.eps=eps - - def __call__(self, optimizer:Optimizer): - self.sch = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode=self.mode, factor=self.factor, - patience=self.patience, verbose=self.verbose, threshold=self.threshold, - threshold_mode=self.threshold, cooldown=self.cooldown, min_lr=self.min_lr, - eps=self.eps) - return self.sch - - -class CyclicLR: - - def __init__(self, base_lr, max_lr, step_size_up=2000, step_size_down=None, - mode='triangular', gamma=1.0, scale_fn=None, scale_mode='cycle', - cycle_momentum=True, base_momentum=0.8, max_momentum=0.9, last_epoch=-1): - - self.base_lr = base_lr - self.max_lr = max_lr - self.step_size_up = step_size_up - self.step_size_down = step_size_down - self.mode = mode - self.gamma = gamma - self.scale_fn = scale_fn - self.scale_mode = scale_mode - self.cycle_momentum = cycle_momentum - self.base_momentum = base_momentum - self.max_momentum = max_momentum - self.last_epoch = last_epoch - - def __call__(self, optimizer:Optimizer): - self.sch = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=self.base_lr, - max_lr=self.max_lr, step_size_up=self.step_size_up, step_size_down=self.step_size_down, - mode=self.mode, gamma=self.gamma, scale_fn=self.scale_fn, scale_mode=self.scale_mode, - cycle_momentum=self.cycle_momentum, base_momentum=self.base_momentum, - max_momentum=self.max_momentum, last_epoch=self.last_epoch) - return self.sch - - -class OneCycleLR: - - def __init__(self, max_lr, total_steps=None, epochs=None, steps_per_epoch=None, - pct_start=0.3, anneal_strategy='cos', cycle_momentum=True, base_momentum=0.85, - max_momentum=0.95, div_factor=25.0, final_div_factor=10000.0, last_epoch=-1): - - self.max_lr = max_lr - self.total_steps = total_steps - self.epochs = epochs - self.steps_per_epoch = steps_per_epoch - self.pct_start = pct_start - self.anneal_strategy = anneal_strategy - self.cycle_momentum = cycle_momentum - self.base_momentum = base_momentum - self.max_momentum = max_momentum - self.div_factor = div_factor - self.final_div_factor = final_div_factor - self.last_epoch = last_epoch - - def __call__(self, optimizer:Optimizer): - self.sch = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr = self.max_lr, - total_steps = self.total_steps, epochs = self.epochs, - steps_per_epoch = self.steps_per_epoch, pct_start = self.pct_start, - anneal_strategy = self.anneal_strategy, cycle_momentum = self.cycle_momentum, - base_momentum = self.base_momentum, max_momentum = self.max_momentum, - div_factor = self.div_factor, final_div_factor = self.final_div_factor, - last_epoch = self.last_epoch) - return self.sch diff --git a/pytorch_widedeep/models/__init__.py b/pytorch_widedeep/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8b3f9fe22f222d5cb91d9dd6c4953a8c071d2ec2 --- /dev/null +++ b/pytorch_widedeep/models/__init__.py @@ -0,0 +1,6 @@ +from .wide import Wide +from .deep_dense import DeepDense +from .deep_text import DeepText +from .deep_image import DeepImage + +from .wide_deep import WideDeep \ No newline at end of file diff --git a/pytorch_widedeep/models/_multiple_lr_scheduler.py b/pytorch_widedeep/models/_multiple_lr_scheduler.py new file mode 100644 index 0000000000000000000000000000000000000000..113957518205a035547cae359005ee67d0531cd8 --- /dev/null +++ b/pytorch_widedeep/models/_multiple_lr_scheduler.py @@ -0,0 +1,12 @@ +import torch + +from ..wdtypes import * + + +class MultipleLRScheduler(object): + def __init__(self, scheds:Dict[str,LRScheduler]): + self._schedulers = scheds + + def step(self): + for _, sc in self._schedulers.items(): + sc.step() diff --git a/pytorch_widedeep/models/_multiple_optimizer.py b/pytorch_widedeep/models/_multiple_optimizer.py new file mode 100644 index 0000000000000000000000000000000000000000..691d2296e209ca5952319c7734dce56255192f98 --- /dev/null +++ b/pytorch_widedeep/models/_multiple_optimizer.py @@ -0,0 +1,16 @@ +import torch + +from ..wdtypes import * + + +class MultipleOptimizer(object): + def __init__(self, opts:Dict[str,Optimizer]): + self._optimizers = opts + + def zero_grad(self): + for _, op in self._optimizers.items(): + op.zero_grad() + + def step(self): + for _, op in self._optimizers.items(): + op.step() \ No newline at end of file diff --git a/pytorch_widedeep/transforms.py b/pytorch_widedeep/models/_multiple_transforms.py similarity index 86% rename from pytorch_widedeep/transforms.py rename to pytorch_widedeep/models/_multiple_transforms.py index b06accc2c7a663ec40edfb454e02b1a698643828..a49b8b19622f212a2e8570165b74f3bfc4574655 100644 --- a/pytorch_widedeep/transforms.py +++ b/pytorch_widedeep/models/_multiple_transforms.py @@ -1,6 +1,6 @@ from torchvision.transforms import Compose -from .wdtypes import * +from ..wdtypes import * class MultipleTransforms(object): diff --git a/pytorch_widedeep/models/_wd_dataset.py b/pytorch_widedeep/models/_wd_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..4a9491be8b8cc10634c0bcd4c4d1ff2dd47516e6 --- /dev/null +++ b/pytorch_widedeep/models/_wd_dataset.py @@ -0,0 +1,47 @@ +import numpy as np +import torch + +from sklearn.utils import Bunch +from torch.utils.data import Dataset + +from ..wdtypes import * + +class WideDeepDataset(Dataset): + def __init__(self, X_wide:np.ndarray, X_deep:np.ndarray, + target:Optional[np.ndarray]=None, X_text:Optional[np.ndarray]=None, + X_img:Optional[np.ndarray]=None, transforms:Optional=None): + + self.X_wide = X_wide + self.X_deep = X_deep + self.X_text = X_text + self.X_img = X_img + self.transforms = transforms + if self.transforms: + self.transforms_names = [tr.__class__.__name__ for tr in self.transforms.transforms] + else: self.transforms_names = [] + self.Y = target + + def __getitem__(self, idx:int): + + X = Bunch(wide=self.X_wide[idx]) + X.deepdense= self.X_deep[idx] + if self.X_text is not None: + X.deeptext = self.X_text[idx] + if self.X_img is not None: + xdi = self.X_img[idx] + if 'int' in str(xdi.dtype) and 'uint8' != str(xdi.dtype): xdi = xdi.astype('uint8') + if 'float' in str(xdi.dtype) and 'float32' != str(xdi.dtype): xdi = xdi.astype('float32') + if not self.transforms or 'ToTensor' not in self.transforms_names: + xdi = xdi.transpose(2,0,1) + if 'int' in str(xdi.dtype): xdi = (xdi/xdi.max()).astype('float32') + if 'ToTensor' in self.transforms_names: xdi = self.transforms(xdi) + elif self.transforms: xdi = self.transforms(torch.Tensor(xdi)) + X.deepimage = xdi + if self.Y is not None: + y = self.Y[idx] + return X, y + else: + return X + + def __len__(self): + return len(self.X_wide) diff --git a/pytorch_widedeep/models/deep_dense.py b/pytorch_widedeep/models/deep_dense.py new file mode 100644 index 0000000000000000000000000000000000000000..7f76ae6604e2c31c943703152c241ace73975294 --- /dev/null +++ b/pytorch_widedeep/models/deep_dense.py @@ -0,0 +1,79 @@ +import numpy as np +import torch + +from torch import nn +from ..wdtypes import * + + +def dense_layer(inp:int, out:int, dropout:float, batchnorm=False): + if batchnorm: + return nn.Sequential( + nn.Linear(inp, out), + nn.BatchNorm1d(out), + nn.LeakyReLU(inplace=True), + nn.Dropout(dropout) + ) + else: + return nn.Sequential( + nn.Linear(inp, out), + nn.LeakyReLU(inplace=True), + nn.Dropout(dropout) + ) + + +class DeepDense(nn.Module): + def __init__(self, + deep_column_idx:Dict[str,int], + hidden_layers:List[int], + dropout:List[float]=0., + embed_input:Optional[List[Tuple[str,int,int]]]=None, + continuous_cols:Optional[List[str]]=None, + batchnorm:bool=False, + output_dim:int=1): + + super(DeepDense, self).__init__() + + self.embed_input = embed_input + self.continuous_cols = continuous_cols + self.deep_column_idx = deep_column_idx + + # Embeddings + if self.embed_input is not None: + self.embed_layers = nn.ModuleDict({'emb_layer_'+col: nn.Embedding(val, dim) + for col, val, dim in self.embed_input}) + emb_inp_dim = np.sum([embed[2] for embed in self.embed_input]) + else: + emb_inp_dim = 0 + + # Continuous + if self.continuous_cols is not None: cont_inp_dim = len(self.continuous_cols) + else: cont_inp_dim = 0 + + # Dense Layers + input_dim = emb_inp_dim + cont_inp_dim + hidden_layers = [input_dim] + hidden_layers + dropout = [0.0] + dropout + self.dense = nn.Sequential() + for i in range(1, len(hidden_layers)): + self.dense.add_module( + 'dense_layer_{}'.format(i-1), + dense_layer( hidden_layers[i-1], hidden_layers[i], dropout[i-1], batchnorm)) + + # Last Linear (Deep Dense Linear ddlinear) + self.dense.add_module('ddlinear', nn.Linear(hidden_layers[-1], output_dim)) + + def forward(self, X:Tensor)->Tensor: + if self.embed_input is not None: + embed = [self.embed_layers['emb_layer_'+col](X[:,self.deep_column_idx[col]].long()) + for col,_,_ in self.embed_input] + if self.continuous_cols is not None: + cont_idx = [self.deep_column_idx[col] for col in self.continuous_cols] + cont = X[:, cont_idx].float() + try: + out = self.dense(torch.cat(embed+[cont], 1)) + except: + try: + out = self.dense(torch.cat(embed, 1)) + except: + out = self.dense(cont) + return out diff --git a/pytorch_widedeep/models/deep_image.py b/pytorch_widedeep/models/deep_image.py new file mode 100644 index 0000000000000000000000000000000000000000..d3d82cf3e3ad297728c274937b5577b782e6fa70 --- /dev/null +++ b/pytorch_widedeep/models/deep_image.py @@ -0,0 +1,87 @@ +import torch + +from ..wdtypes import * + +from torch import nn +from torchvision import models + + +def conv_layer(ni:int, nf:int, ks:int=3, stride:int=1, maxpool:bool=True, + adaptiveavgpool:bool=False): + layer = nn.Sequential( + nn.Conv2d(ni, nf, kernel_size=ks, bias=True, stride=stride, padding=ks//2), + nn.BatchNorm2d(nf, momentum=0.01), + nn.LeakyReLU(negative_slope=0.1, inplace=True)) + if maxpool: layer.add_module('maxpool', nn.MaxPool2d(2, 2)) + if adaptiveavgpool: layer.add_module('adaptiveavgpool', nn.AdaptiveAvgPool2d(output_size=(1, 1))) + return layer + + +class DeepImage(nn.Module): + + def __init__(self, + output_dim:int=1, + pretrained:bool=True, + resnet=18, + freeze:Union[str,int]=6): + super(DeepImage, self).__init__() + """ + Standard image classifier/regressor using a pretrained network + freezing some of the first layers (or all layers). + + I use Resnets which have 9 "components" before the last dense layers. + The first 4 are: conv->batchnorm->relu->maxpool. + + After that we have 4 additional 'layers' (so 4+4=8) comprised by a + series of convolutions and then the final AdaptiveAvgPool2d (8+1=9). + + The parameter freeze sets the last layer to be frozen. For example, + freeze=6 will freeze all but the last 2 Layers and AdaptiveAvgPool2d + layer. If freeze='all' it freezes the entire network. + """ + if pretrained: + if resnet==18: + vision_model = models.resnet18(pretrained=True) + elif resnet==34: + vision_model = models.resnet34(pretrained=True) + elif resnet==50: + vision_model = models.resnet50(pretrained=True) + + backbone_layers = list(vision_model.children())[:-1] + + if isinstance(freeze, str): + frozen_layers = [] + for layer in backbone_layers: + for param in layer.parameters(): + param.requires_grad = False + frozen_layers.append(layer) + self.backbone = nn.Sequential(*frozen_layers) + if isinstance(freeze, int): + assert freeze < 8, 'freeze must be less than 8 when using resnet architectures' + frozen_layers = [] + trainable_layers = backbone_layers[freeze:] + for layer in backbone_layers[:freeze]: + for param in layer.parameters(): + param.requires_grad = False + frozen_layers.append(layer) + + backbone_layers = frozen_layers + trainable_layers + self.backbone = nn.Sequential(*backbone_layers) + else: + self.backbone = nn.Sequential( + conv_layer(3, 64, 3), + conv_layer(64, 128, 1, maxpool=False), + conv_layer(128, 256, 1, maxpool=False), + conv_layer(256, 512, 1, maxpool=False, adaptiveavgpool=True), + ) + self.dilinear = nn.Sequential( + nn.Linear(512, 256), + nn.Linear(256, 128), + nn.Linear(128, output_dim) + ) + + def forward(self, x:Tensor)->Tensor: + x = self.backbone(x) + x = x.view(x.size(0), -1) + out = self.dilinear(x) + return out diff --git a/pytorch_widedeep/models/deep_text.py b/pytorch_widedeep/models/deep_text.py new file mode 100644 index 0000000000000000000000000000000000000000..7a96ff1694ad8a144d7286bebcdcb9c1713d39a1 --- /dev/null +++ b/pytorch_widedeep/models/deep_text.py @@ -0,0 +1,73 @@ +import numpy as np +import torch +import warnings + +from torch import nn +from ..wdtypes import * + + +class DeepText(nn.Module): + def __init__(self, + vocab_size:int, + embed_dim:Optional[int]=None, + hidden_dim:int=64, + n_layers:int=3, + rnn_dropout:float=0., + spatial_dropout:float=0., + padding_idx:int=1, + output_dim:int=1, + bidirectional:bool=False, + embedding_matrix:Optional[np.ndarray]=None): + super(DeepText, self).__init__() + """ + Standard Text Classifier/Regressor with a stack of RNNs. + """ + + if embed_dim is not None and embedding_matrix is not None and not embed_dim==embedding_matrix.shape[1]: + warnings.warn( + 'the input embedding dimension {} and the dimension of the ' + 'pretrained embeddings {} do not match. The pretrained embeddings ' + 'dimension ({}) will be used'.format(embed_dim, embedding_matrix.shape[1], + embedding_matrix.shape[1]), UserWarning) + + self.bidirectional = bidirectional + self.spatial_dropout = spatial_dropout + self.word_embed_dropout = nn.Dropout2d(spatial_dropout) + + # Pre-trained Embeddings + if isinstance(embedding_matrix, np.ndarray): + self.word_embed = nn.Embedding(vocab_size, embedding_matrix.shape[1], padding_idx = padding_idx) + self.word_embed.weight = nn.Parameter(torch.Tensor(embedding_matrix)) + embed_dim = embedding_matrix.shape[1] + else: + self.word_embed = nn.Embedding(vocab_size, embed_dim, padding_idx = padding_idx) + + # stack of GRUs + self.rnn = nn.GRU(embed_dim, + hidden_dim, + num_layers=n_layers, + bidirectional=bidirectional, + dropout=rnn_dropout, + batch_first=True) + input_dim = hidden_dim*2 if bidirectional else hidden_dim + + # Deep Text Linear (dtlinear) + self.dtlinear = nn.Linear(input_dim, output_dim) + + def forward(self, X:Tensor)->Tensor: + + embed = self.word_embed(X.long()) + # Spatial dropout: dropping an entire channel (word-vector dimension) + if self.spatial_dropout > 0.: + sd_embed = embed.unsqueeze(2) + sd_embed = sd_embed.permute(0, 3, 2, 1) + sd_embed = self.word_embed_dropout(sd_embed) + sd_embed = sd_embed.permute(0, 3, 2, 1) + embed = sd_embed.squeeze(2) + o, h = self.rnn(embed) + if self.bidirectional: + last_h = torch.cat((h[-2], h[-1]), dim = 1) + else: + last_h = h[-1] + out = self.dtlinear(last_h) + return out diff --git a/pytorch_widedeep/models/wide.py b/pytorch_widedeep/models/wide.py new file mode 100644 index 0000000000000000000000000000000000000000..64bd513835e59fe043088ea9d6ee343648a19fc1 --- /dev/null +++ b/pytorch_widedeep/models/wide.py @@ -0,0 +1,14 @@ +import torch + +from torch import nn +from ..wdtypes import * + +class Wide(nn.Module): + def __init__(self,wide_dim:int, output_dim:int=1): + super(Wide, self).__init__() + # (Wide Linear, wlinear) + self.wlinear = nn.Linear(wide_dim, output_dim) + + def forward(self, X:Tensor)->Tensor: + out = self.wlinear(X.float()) + return out diff --git a/pytorch_widedeep/models.py b/pytorch_widedeep/models/wide_deep.py similarity index 56% rename from pytorch_widedeep/models.py rename to pytorch_widedeep/models/wide_deep.py index 50de8addac5947c6b07ecf395d47acd77927e6eb..bd0cda8bda4dcf49a3e92ddaab1b8514fe29cd45 100644 --- a/pytorch_widedeep/models.py +++ b/pytorch_widedeep/models/wide_deep.py @@ -4,300 +4,26 @@ import torch import torch.nn as nn import torch.nn.functional as F -from .wdtypes import * -from .initializers import Initializer, MultipleInitializers -from .optimizers import MultipleOptimizers -from .lr_schedulers import MultipleLRScheduler -from .callbacks import Callback, History, CallbackContainer -from .metrics import Metric, MultipleMetrics, MetricCallback -from .transforms import MultipleTransforms -from .losses import FocalLoss +from ..wdtypes import * + +from ..initializers import Initializer, MultipleInitializer +from ..callbacks import Callback, History, CallbackContainer +from ..metrics import Metric, MultipleMetrics, MetricCallback +from ..losses import FocalLoss + +from ._wd_dataset import WideDeepDataset +from ._multiple_optimizer import MultipleOptimizer +from ._multiple_lr_scheduler import MultipleLRScheduler +from ._multiple_transforms import MultipleTransforms from tqdm import tqdm,trange -from sklearn.utils import Bunch from sklearn.model_selection import train_test_split -from torchvision import models -from torch.utils.data import Dataset, DataLoader +from torch.utils.data import DataLoader -from copy import deepcopy use_cuda = torch.cuda.is_available() -def dense_layer(inp:int, out:int, dropout:float, batchnorm=False): - if batchnorm: - return nn.Sequential( - nn.Linear(inp, out), - nn.BatchNorm1d(out), - nn.LeakyReLU(inplace=True), - nn.Dropout(dropout) - ) - else: - return nn.Sequential( - nn.Linear(inp, out), - nn.LeakyReLU(inplace=True), - nn.Dropout(dropout) - ) - - -def conv_layer(ni:int, nf:int, ks:int=3, stride:int=1, maxpool:bool=True, - adaptiveavgpool:bool=False): - layer = nn.Sequential( - nn.Conv2d(ni, nf, kernel_size=ks, bias=True, stride=stride, padding=ks//2), - nn.BatchNorm2d(nf, momentum=0.01), - nn.LeakyReLU(negative_slope=0.1, inplace=True)) - if maxpool: layer.add_module('maxpool', nn.MaxPool2d(2, 2)) - if adaptiveavgpool: layer.add_module('adaptiveavgpool', nn.AdaptiveAvgPool2d(output_size=(1, 1))) - return layer - - -class Wide(nn.Module): - def __init__(self,wide_dim:int, output_dim:int=1): - super(Wide, self).__init__() - # (Wide Linear, wlinear) - self.wlinear = nn.Linear(wide_dim, output_dim) - - def forward(self, X:Tensor)->Tensor: - out = self.wlinear(X.float()) - return out - - -class DeepDense(nn.Module): - def __init__(self, - deep_column_idx:Dict[str,int], - hidden_layers:List[int], - dropout:List[float]=0., - embed_input:Optional[List[Tuple[str,int,int]]]=None, - continuous_cols:Optional[List[str]]=None, - batchnorm:bool=False, - output_dim:int=1): - - super(DeepDense, self).__init__() - - self.embed_input = embed_input - self.continuous_cols = continuous_cols - self.deep_column_idx = deep_column_idx - - # Embeddings - if self.embed_input is not None: - self.embed_layers = nn.ModuleDict({'emb_layer_'+col: nn.Embedding(val, dim) - for col, val, dim in self.embed_input}) - emb_inp_dim = np.sum([embed[2] for embed in self.embed_input]) - else: - emb_inp_dim = 0 - - # Continuous - if self.continuous_cols is not None: cont_inp_dim = len(self.continuous_cols) - else: cont_inp_dim = 0 - - # Dense Layers - input_dim = emb_inp_dim + cont_inp_dim - hidden_layers = [input_dim] + hidden_layers - dropout = [0.0] + dropout - self.dense = nn.Sequential() - for i in range(1, len(hidden_layers)): - self.dense.add_module( - 'dense_layer_{}'.format(i-1), - dense_layer( hidden_layers[i-1], hidden_layers[i], dropout[i-1], batchnorm)) - - # Last Linear (Deep Dense Linear ddlinear) - self.dense.add_module('ddlinear', nn.Linear(hidden_layers[-1], output_dim)) - - def forward(self, X:Tensor)->Tensor: - if self.embed_input is not None: - embed = [self.embed_layers['emb_layer_'+col](X[:,self.deep_column_idx[col]].long()) - for col,_,_ in self.embed_input] - if self.continuous_cols is not None: - cont_idx = [self.deep_column_idx[col] for col in self.continuous_cols] - cont = X[:, cont_idx].float() - try: - out = self.dense(torch.cat(embed+[cont], 1)) - except: - try: - out = self.dense(torch.cat(embed, 1)) - except: - out = self.dense(cont) - return out - - -class DeepText(nn.Module): - def __init__(self, - vocab_size:int, - embed_dim:Optional[int]=None, - hidden_dim:int=64, - n_layers:int=3, - rnn_dropout:float=0., - spatial_dropout:float=0., - padding_idx:int=1, - output_dim:int=1, - bidirectional:bool=False, - embedding_matrix:Optional[np.ndarray]=None): - super(DeepText, self).__init__() - """ - Standard Text Classifier/Regressor with a stack of RNNs. - """ - - if embed_dim is not None and embedding_matrix is not None and not embed_dim==embedding_matrix.shape[1]: - warnings.warn( - 'the input embedding dimension {} and the dimension of the ' - 'pretrained embeddings {} do not match. The pretrained embeddings ' - 'dimension ({}) will be used'.format(embed_dim, embedding_matrix.shape[1], - embedding_matrix.shape[1]), UserWarning) - - self.bidirectional = bidirectional - self.spatial_dropout = spatial_dropout - self.word_embed_dropout = nn.Dropout2d(spatial_dropout) - - # Pre-trained Embeddings - if isinstance(embedding_matrix, np.ndarray): - self.word_embed = nn.Embedding(vocab_size, embedding_matrix.shape[1], padding_idx = padding_idx) - self.word_embed.weight = nn.Parameter(torch.Tensor(embedding_matrix)) - embed_dim = embedding_matrix.shape[1] - else: - self.word_embed = nn.Embedding(vocab_size, embed_dim, padding_idx = padding_idx) - - # stack of GRUs - self.rnn = nn.GRU(embed_dim, - hidden_dim, - num_layers=n_layers, - bidirectional=bidirectional, - dropout=rnn_dropout, - batch_first=True) - input_dim = hidden_dim*2 if bidirectional else hidden_dim - - # Deep Text Linear (dtlinear) - self.dtlinear = nn.Linear(input_dim, output_dim) - - def forward(self, X:Tensor)->Tensor: - - embed = self.word_embed(X.long()) - # Spatial dropout: dropping an entire channel (word-vector dimension) - if self.spatial_dropout > 0.: - sd_embed = embed.unsqueeze(2) - sd_embed = sd_embed.permute(0, 3, 2, 1) - sd_embed = self.word_embed_dropout(sd_embed) - sd_embed = sd_embed.permute(0, 3, 2, 1) - embed = sd_embed.squeeze(2) - o, h = self.rnn(embed) - if self.bidirectional: - last_h = torch.cat((h[-2], h[-1]), dim = 1) - else: - last_h = h[-1] - out = self.dtlinear(last_h) - return out - - -class DeepImage(nn.Module): - - def __init__(self, - output_dim:int=1, - pretrained:bool=True, - resnet=18, - freeze:Union[str,int]=6): - super(DeepImage, self).__init__() - """ - Standard image classifier/regressor using a pretrained network - freezing some of the first layers (or all layers). - - I use Resnets which have 9 "components" before the last dense layers. - The first 4 are: conv->batchnorm->relu->maxpool. - - After that we have 4 additional 'layers' (so 4+4=8) comprised by a - series of convolutions and then the final AdaptiveAvgPool2d (8+1=9). - - The parameter freeze sets the last layer to be frozen. For example, - freeze=6 will freeze all but the last 2 Layers and AdaptiveAvgPool2d - layer. If freeze='all' it freezes the entire network. - """ - if pretrained: - if resnet==18: - vision_model = models.resnet18(pretrained=True) - elif resnet==34: - vision_model = models.resnet34(pretrained=True) - elif resnet==50: - vision_model = models.resnet50(pretrained=True) - - backbone_layers = list(vision_model.children())[:-1] - - if isinstance(freeze, str): - frozen_layers = [] - for layer in backbone_layers: - for param in layer.parameters(): - param.requires_grad = False - frozen_layers.append(layer) - self.backbone = nn.Sequential(*frozen_layers) - if isinstance(freeze, int): - assert freeze < 8, 'freeze must be less than 8 when using resnet architectures' - frozen_layers = [] - trainable_layers = backbone_layers[freeze:] - for layer in backbone_layers[:freeze]: - for param in layer.parameters(): - param.requires_grad = False - frozen_layers.append(layer) - - backbone_layers = frozen_layers + trainable_layers - self.backbone = nn.Sequential(*backbone_layers) - else: - self.backbone = nn.Sequential( - conv_layer(3, 64, 3), - conv_layer(64, 128, 1, maxpool=False), - conv_layer(128, 256, 1, maxpool=False), - conv_layer(256, 512, 1, maxpool=False, adaptiveavgpool=True), - ) - self.dilinear = nn.Sequential( - nn.Linear(512, 256), - nn.Linear(256, 128), - nn.Linear(128, output_dim) - ) - - def forward(self, x:Tensor)->Tensor: - x = self.backbone(x) - x = x.view(x.size(0), -1) - out = self.dilinear(x) - return out - - -class WideDeepLoader(Dataset): - def __init__(self, X_wide:np.ndarray, X_deep:np.ndarray, - target:Optional[np.ndarray]=None, X_text:Optional[np.ndarray]=None, - X_img:Optional[np.ndarray]=None, transforms:Optional=None): - - self.X_wide = X_wide - self.X_deep = X_deep - self.X_text = X_text - self.X_img = X_img - self.transforms = transforms - if self.transforms: - self.transforms_names = [tr.__class__.__name__ for tr in self.transforms.transforms] - else: self.transforms_names = [] - self.Y = target - - def __getitem__(self, idx:int): - - X = Bunch(wide=self.X_wide[idx]) - X.deepdense= self.X_deep[idx] - if self.X_text is not None: - X.deeptext = self.X_text[idx] - if self.X_img is not None: - xdi = self.X_img[idx] - if 'int' in str(xdi.dtype) and 'uint8' != str(xdi.dtype): xdi = xdi.astype('uint8') - if 'float' in str(xdi.dtype) and 'float32' != str(xdi.dtype): xdi = xdi.astype('float32') - if not self.transforms or 'ToTensor' not in self.transforms_names: - xdi = xdi.transpose(2,0,1) - if 'int' in str(xdi.dtype): xdi = (xdi/xdi.max()).astype('float32') - if 'ToTensor' in self.transforms_names: xdi = self.transforms(xdi) - elif self.transforms: xdi = self.transforms(torch.Tensor(xdi)) - X.deepimage = xdi - if self.Y is not None: - y = self.Y[idx] - return X, y - else: - return X - - def __len__(self): - return len(self.X_wide) - - class WideDeep(nn.Module): def __init__(self, @@ -325,7 +51,7 @@ class WideDeep(nn.Module): initializers:Optional[Dict[str,Initializer]]=None, optimizers:Optional[Dict[str,Optimizer]]=None, global_optimizer:Optional[Optimizer]=None, - param_groups:Optional[Union[List[Dict],Dict[str,List[Dict]]]]=None, + # param_groups:Optional[Union[List[Dict],Dict[str,List[Dict]]]]=None, lr_schedulers:Optional[Dict[str,LRScheduler]]=None, global_lr_scheduler:Optional[LRScheduler]=None, transforms:Optional[List[Transforms]]=None, @@ -350,32 +76,22 @@ class WideDeep(nn.Module): self.class_weight = None if initializers is not None: - self.initializer = MultipleInitializers(initializers, verbose=self.verbose) + self.initializer = MultipleInitializer(initializers, verbose=self.verbose) self.initializer.apply(self) if optimizers is not None: - self.optimizer = MultipleOptimizers(optimizers) - self.optimizer.apply(self, param_groups) + self.optimizer = MultipleOptimizer(optimizers) elif global_optimizer is not None: - if isinstance(global_optimizer, type): global_optimizer = global_optimizer() - self.optimizer = global_optimizer(self, param_groups) + self.optimizer = global_optimizer else: self.optimizer = torch.optim.Adam(self.parameters()) if lr_schedulers is not None: self.lr_scheduler = MultipleLRScheduler(lr_schedulers) - self.lr_scheduler.apply(self.optimizer._optimizers) scheduler_names = [sc.__class__.__name__.lower() for _,sc in self.lr_scheduler._schedulers.items()] self.cyclic = any(['cycl' in sn for sn in scheduler_names]) elif global_lr_scheduler is not None: - if isinstance(global_lr_scheduler, type): global_lr_scheduler = global_lr_scheduler() - try: self.lr_scheduler = global_lr_scheduler(self.optimizer) - except: - raise TypeError( - "{} is not an Optimizer. If a global learning rate scheduler " - "is used then a single global optimizer must also be used".format( - type(self.optimizer).__name__) - ) + self.lr_scheduler = global_lr_scheduler self.cyclic = 'cycl' in self.lr_scheduler.__class__.__name__.lower() else: self.lr_scheduler, self.cyclic = None, False @@ -494,7 +210,7 @@ class WideDeep(nn.Module): except: pass try: X_train.update({'X_img': X_img}) except: pass - train_set = WideDeepLoader(**X_train, transforms=self.transforms) + train_set = WideDeepDataset(**X_train, transforms=self.transforms) eval_set = None else: if X_val is not None: @@ -520,8 +236,8 @@ class WideDeep(nn.Module): X_tr_img, X_val_img = train_test_split(X_img, test_size=val_split, random_state=seed) X_train.update({'X_img': X_tr_img}), X_val.update({'X_img': X_val_img}) except: pass - train_set = WideDeepLoader(**X_train, transforms=self.transforms) - eval_set = WideDeepLoader(**X_val, transforms=self.transforms) + train_set = WideDeepDataset(**X_train, transforms=self.transforms) + eval_set = WideDeepDataset(**X_val, transforms=self.transforms) return train_set, eval_set def fit(self, X_wide:Optional[np.ndarray]=None, X_deep:Optional[np.ndarray]=None, @@ -590,12 +306,12 @@ class WideDeep(nn.Module): X_img:Optional[np.ndarray]=None, X_test:Optional[Dict[str, np.ndarray]]=None)->np.ndarray: if X_test is not None: - test_set = WideDeepLoader(**X_test) + test_set = WideDeepDataset(**X_test) else: load_dict = {'X_wide': X_wide, 'X_deep': X_deep} if X_text is not None: load_dict.update({'X_text': X_text}) if X_img is not None: load_dict.update({'X_img': X_img}) - test_set = WideDeepLoader(**load_dict) + test_set = WideDeepDataset(**load_dict) test_loader = torch.utils.data.DataLoader(dataset=test_set, batch_size=self.batch_size,shuffle=False) @@ -622,12 +338,12 @@ class WideDeep(nn.Module): X_img:Optional[np.ndarray]=None, X_test:Optional[Dict[str, np.ndarray]]=None)->np.ndarray: if X_test is not None: - test_set = WideDeepLoader(**X_test) + test_set = WideDeepDataset(**X_test) else: load_dict = {'X_wide': X_wide, 'X_deep': X_deep} if X_text is not None: load_dict.update({'X_text': X_text}) if X_img is not None: load_dict.update({'X_img': X_img}) - test_set = WideDeepLoader(**load_dict) + test_set = WideDeepDataset(**load_dict) test_loader = torch.utils.data.DataLoader(dataset=test_set, batch_size=self.batch_size,shuffle=False) diff --git a/pytorch_widedeep/optim/__init__.py b/pytorch_widedeep/optim/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7497e8fdc17210d18c96bb29b0746d551d2c1296 --- /dev/null +++ b/pytorch_widedeep/optim/__init__.py @@ -0,0 +1 @@ +from .radam import RAdam diff --git a/pytorch_widedeep/radam.py b/pytorch_widedeep/optim/radam.py similarity index 100% rename from pytorch_widedeep/radam.py rename to pytorch_widedeep/optim/radam.py diff --git a/pytorch_widedeep/optimizers.py b/pytorch_widedeep/optimizers.py deleted file mode 100644 index 355f43d744a84cd06f639b7c8c8fe5b8d03f58ad..0000000000000000000000000000000000000000 --- a/pytorch_widedeep/optimizers.py +++ /dev/null @@ -1,121 +0,0 @@ -import torch -import warnings - -from torch import nn -from .radam import RAdam as orgRAdam -from .wdtypes import * - -import pdb - -class MultipleOptimizers(object): - - def __init__(self, optimizers:Dict[str,Optimizer]): - - instantiated_optimizers = {} - for model_name, optimizer in optimizers.items(): - if isinstance(optimizer, type): - instantiated_optimizers[model_name] = optimizer() - else: instantiated_optimizers[model_name] = optimizer - self._optimizers = instantiated_optimizers - - def apply(self, model:nn.Module, param_group=None): - - children = list(model.children()) - children_names = [child.__class__.__name__.lower() for child in children] - - if not all([cn in children_names for cn in self._optimizers.keys()]): - raise ValueError('Model name has to be one of: {}'.format(children_names)) - - for child, name in zip(children, children_names): - if name in self._optimizers and param_group is not None and name in param_group: - self._optimizers[name] = self._optimizers[name](child, param_group[name]) - elif name in self._optimizers: - self._optimizers[name] = self._optimizers[name](child) - else: - warnings.warn( - "No optimizer found for {}. Adam optimizer with default " - "settings will be used".format(name)) - self._optimizers[name] = Adam()(child) - - def zero_grad(self): - for _, opt in self._optimizers.items(): - opt.zero_grad() - - def step(self): - for _, opt in self._optimizers.items(): - opt.step() - - -class Adam: - - def __init__(self, lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, - amsgrad=False): - - self.lr=lr - self.betas=betas - self.eps=eps - self.weight_decay=weight_decay - self.amsgrad=amsgrad - - def __call__(self, submodel:nn.Module, param_group=None) -> Optimizer: - if param_group is not None: params = param_group - else: params = submodel.parameters() - self.opt = torch.optim.Adam(params, lr=self.lr, betas=self.betas, eps=self.eps, - weight_decay=self.weight_decay, amsgrad=self.amsgrad) - return self.opt - - -class RAdam: - - def __init__(self, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0): - - self.lr=lr - self.betas=betas - self.eps=eps - self.weight_decay=weight_decay - - def __call__(self, submodel:nn.Module, param_group=None) -> Optimizer: - if param_group is not None: params = param_group - else: params = submodel.parameters() - self.opt = orgRAdam(submodel.parameters(), lr=self.lr, betas=self.betas, eps=self.eps, - weight_decay=self.weight_decay) - return self.opt - - -class SGD: - - def __init__(self, lr=1e-3, momentum=0, dampening=0, weight_decay=0, nesterov=False): - - self.lr=lr - self.momentum=momentum - self.dampening=dampening - self.weight_decay=weight_decay - self.nesterov=nesterov - - def __call__(self, submodel:nn.Module, param_group=None) -> Optimizer: - if param_group is not None: params = param_group - else: params = submodel.parameters() - self.opt = torch.optim.SGD(submodel.parameters(), lr=self.lr, momentum=self.momentum, - dampening=self.dampening, weight_decay=self.weight_decay, nesterov=self.nesterov) - return self.opt - - -class RMSprop: - - def __init__(self, lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False): - - self.lr = lr - self.alpha = alpha - self.eps = eps - self.weight_decay = weight_decay - self.momentum = momentum - self.centered = centered - - def __call__(self, submodel:nn.Module, param_group=None) -> Optimizer: - if param_group is not None: params = param_group - else: params = submodel.parameters() - self.opt = torch.optim.RMSprop(submodel.parameters(), lr = self.lr, alpha = self.alpha, - eps = self.eps, weight_decay = self.weight_decay, momentum = self.momentum, - centered = self.centered) - return self.opt - diff --git a/pytorch_widedeep/preprocessing/__init__.py b/pytorch_widedeep/preprocessing/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5911edf0d68cfc100069bdc5e8cb4cf149cf2d5c --- /dev/null +++ b/pytorch_widedeep/preprocessing/__init__.py @@ -0,0 +1,4 @@ +from ._preprocessors import WidePreprocessor +from ._preprocessors import DeepPreprocessor +from ._preprocessors import TextPreprocessor +from ._preprocessors import ImagePreprocessor diff --git a/pytorch_widedeep/preprocessing.py b/pytorch_widedeep/preprocessing/_preprocessors.py similarity index 91% rename from pytorch_widedeep/preprocessing.py rename to pytorch_widedeep/preprocessing/_preprocessors.py index 9ba4ce813da07fd532e945ede3ac2cedf5f6682d..2915ce8b62a149c53729f8446e504dad51e2a08c 100644 --- a/pytorch_widedeep/preprocessing.py +++ b/pytorch_widedeep/preprocessing/_preprocessors.py @@ -6,14 +6,15 @@ import warnings from sklearn.preprocessing import OneHotEncoder from sklearn.preprocessing import StandardScaler from sklearn.utils.validation import check_is_fitted +from tqdm import tqdm -from .wdtypes import * -from .utils.deep_utils import label_encoder +from ..wdtypes import * +from .utils.dense_utils import * from .utils.text_utils import * from .utils.image_utils import * -class DataProcessor(object): +class BasePreprocessor(object): def __init__(self): pass @@ -28,10 +29,10 @@ class DataProcessor(object): pass -class WideProcessor(DataProcessor): +class WidePreprocessor(BasePreprocessor): def __init__(self, wide_cols:List[str], crossed_cols=None, already_dummies:Optional[List[str]]=None): - super(WideProcessor, self).__init__() + super(WidePreprocessor, self).__init__() self.wide_cols = wide_cols self.crossed_cols = crossed_cols self.already_dummies = already_dummies @@ -47,7 +48,7 @@ class WideProcessor(DataProcessor): crossed_colnames.append(colname) return df, crossed_colnames - def fit(self, df:pd.DataFrame)->DataProcessor: + def fit(self, df:pd.DataFrame)->BasePreprocessor: df_wide = df.copy()[self.wide_cols] if self.crossed_cols is not None: df_wide, crossed_colnames = self._cross_cols(df_wide) @@ -79,14 +80,14 @@ class WideProcessor(DataProcessor): return self.fit(df).transform(df) -class DeepProcessor(DataProcessor): +class DeepPreprocessor(BasePreprocessor): def __init__(self, embed_cols:List[Union[str,Tuple[str,int]]]=None, continuous_cols:List[str]=None, already_standard:Optional[List[str]]=None, scale:bool=True, default_embed_dim:int=8): - super(DeepProcessor, self).__init__() + super(DeepPreprocessor, self).__init__() self.embed_cols=embed_cols self.continuous_cols=continuous_cols @@ -113,7 +114,7 @@ class DeepProcessor(DataProcessor): else: self.standardize_cols = self.continuous_cols return df.copy()[self.continuous_cols] - def fit(self, df:pd.DataFrame)->DataProcessor: + def fit(self, df:pd.DataFrame)->BasePreprocessor: if self.embed_cols is not None: df_emb = self._prepare_embed(df) _, self.encoding_dict = label_encoder(df_emb, cols=df_emb.columns.tolist()) @@ -154,19 +155,19 @@ class DeepProcessor(DataProcessor): return self.fit(df).transform(df) -class TextProcessor(DataProcessor): - """docstring for TextProcessor""" +class TextPreprocessor(BasePreprocessor): + """docstring for TextPreprocessor""" def __init__(self, max_vocab:int=30000, min_freq:int=5, maxlen:int=80, word_vectors_path:Optional[str]=None, verbose:int=1): - super(TextProcessor, self).__init__() + super(TextPreprocessor, self).__init__() self.max_vocab = max_vocab self.min_freq = min_freq self.maxlen = maxlen self.word_vectors_path = word_vectors_path self.verbose = verbose - def fit(self, df:pd.DataFrame, text_col:str)->DataProcessor: + def fit(self, df:pd.DataFrame, text_col:str)->BasePreprocessor: text_col = text_col texts = df[text_col].tolist() tokens = get_texts(texts) @@ -190,15 +191,15 @@ class TextProcessor(DataProcessor): return self.fit(df, text_col).transform(df, text_col) -class ImageProcessor(DataProcessor): - """docstring for ImageProcessor""" +class ImagePreprocessor(BasePreprocessor): + """docstring for ImagePreprocessor""" def __init__(self, width:int=224, height:int=224, verbose:int=1): - super(ImageProcessor, self).__init__() + super(ImagePreprocessor, self).__init__() self.width = width self.height = height self.verbose = verbose - def fit(self)->DataProcessor: + def fit(self)->BasePreprocessor: self.aap = AspectAwarePreprocessor(self.width, self.height) self.spp = SimplePreprocessor(self.width, self.height) return self diff --git a/pytorch_widedeep/utils/__init__.py b/pytorch_widedeep/preprocessing/utils/__init__.py similarity index 100% rename from pytorch_widedeep/utils/__init__.py rename to pytorch_widedeep/preprocessing/utils/__init__.py diff --git a/pytorch_widedeep/utils/deep_utils.py b/pytorch_widedeep/preprocessing/utils/dense_utils.py similarity index 92% rename from pytorch_widedeep/utils/deep_utils.py rename to pytorch_widedeep/preprocessing/utils/dense_utils.py index a4e627513a6d4aa891d32cbb41127ab8e7d201d8..b031cffd025179ce0790b06aacc8a42cedebbd3c 100644 --- a/pytorch_widedeep/utils/deep_utils.py +++ b/pytorch_widedeep/preprocessing/utils/dense_utils.py @@ -1,7 +1,7 @@ import numpy as np import pandas as pd -from ..wdtypes import * +from ...wdtypes import * pd.options.mode.chained_assignment = None diff --git a/pytorch_widedeep/utils/fastai_transforms.py b/pytorch_widedeep/preprocessing/utils/fastai_transforms.py similarity index 99% rename from pytorch_widedeep/utils/fastai_transforms.py rename to pytorch_widedeep/preprocessing/utils/fastai_transforms.py index 5ff6603169127babba7874ebf62da14354012cb0..2949a9b084eff7c8f913549b94355e26d0eb6bd1 100644 --- a/pytorch_widedeep/utils/fastai_transforms.py +++ b/pytorch_widedeep/preprocessing/utils/fastai_transforms.py @@ -8,7 +8,7 @@ way I avoid the numerous fastai dependencies. Credit for the code here to Jeremy Howard and the fastai team ''' -from ..wdtypes import * +from ...wdtypes import * import sys import os diff --git a/pytorch_widedeep/utils/image_utils.py b/pytorch_widedeep/preprocessing/utils/image_utils.py similarity index 90% rename from pytorch_widedeep/utils/image_utils.py rename to pytorch_widedeep/preprocessing/utils/image_utils.py index 2cd59bb9b6a0be45efed3c329464142bc6499c62..0c83a3d753ad1d17df38e3b27114275fe6f52715 100644 --- a/pytorch_widedeep/utils/image_utils.py +++ b/pytorch_widedeep/preprocessing/utils/image_utils.py @@ -8,16 +8,10 @@ Credit for the code here to ADRIAN ROSEBROCK ''' import numpy as np -import pandas as pd -import warnings import imutils import cv2 -from os import listdir -from tqdm import tqdm -from sklearn.utils.validation import check_is_fitted - -from ..wdtypes import * +from ...wdtypes import * class AspectAwarePreprocessor: diff --git a/pytorch_widedeep/utils/text_utils.py b/pytorch_widedeep/preprocessing/utils/text_utils.py similarity index 98% rename from pytorch_widedeep/utils/text_utils.py rename to pytorch_widedeep/preprocessing/utils/text_utils.py index 08c00e65e8f94057fce069d4b163af882c229135..f8fe6613a85a3e2f3fcdcef80a4f095c11428650 100644 --- a/pytorch_widedeep/utils/text_utils.py +++ b/pytorch_widedeep/preprocessing/utils/text_utils.py @@ -4,7 +4,7 @@ import html import os import re -from ..wdtypes import * +from ...wdtypes import * from .fastai_transforms import Tokenizer, Vocab from gensim.utils import tokenize diff --git a/setup.py b/setup.py index cb003ac83bbc0f6b1349cb82ab36394b1332f6e3..d3d54cd4610d4b45f022f9309f5b7ca60273b26c 100644 --- a/setup.py +++ b/setup.py @@ -46,8 +46,6 @@ setup_kwargs = { "imutils", "torch", "torchvision", - "fastai", - # "opencv-python", "tqdm"], 'classifiers': [ dev_status[majorminor], diff --git a/tests/test_data_utils/test_deep_dense.py b/tests/test_data_utils/test_deep_dense.py index 24d637550660859bcbba4e1e230cfb636b52facc..15861c3fc3af2dd3ff48179aab9182cd8c52c5a0 100644 --- a/tests/test_data_utils/test_deep_dense.py +++ b/tests/test_data_utils/test_deep_dense.py @@ -2,8 +2,8 @@ import numpy as np import pandas as pd import pytest -from pytorch_widedeep.utils.deep_utils import label_encoder -from pytorch_widedeep.preprocessing import DeepProcessor +from pytorch_widedeep.preprocessing.utils.dense_utils import label_encoder +from pytorch_widedeep.preprocessing import DeepPreprocessor def create_test_dataset(input_type, input_type_2=None): @@ -64,18 +64,18 @@ def test_label_encoder_with_custom_encoder(input_df, encoding_dict, output_df): assert tmp_df.equals(output_df) ################################################################################ -# Test the DeepProcessor: only categorical columns to be represented with +# Test the DeepPreprocessor: only categorical columns to be represented with # embeddings ############################################################################### cat_embed_cols = [('col1',5), ('col2',5)] -preprocessor1 = DeepProcessor(cat_embed_cols) +preprocessor1 = DeepPreprocessor(cat_embed_cols) X_letters = preprocessor1.fit_transform(df_letters) embed_input_letters = preprocessor1.embeddings_input decoding_dict_letters = {c: {k:v for v,k in preprocessor1.encoding_dict[c].items()} for c in preprocessor1.encoding_dict.keys()} -preprocessor2 = DeepProcessor(cat_embed_cols) +preprocessor2 = DeepPreprocessor(cat_embed_cols) X_numbers = preprocessor2.fit_transform(df_numbers) embed_input_numbers = preprocessor2.embeddings_input decoding_dict_numbers = {c: {k:v for v,k in preprocessor2.encoding_dict[c].items()} for c in preprocessor2.encoding_dict.keys()} @@ -105,7 +105,7 @@ def test_prepare_deep_without_continous_columns(input_df, X_deep, embed_input, d ################################################################################ -# Test the DeepProcessor: only continouos columns +# Test the DeepPreprocessor: only continouos columns ############################################################################### def test_prepare_deep_without_embedding_columns(): @@ -113,7 +113,7 @@ def test_prepare_deep_without_embedding_columns(): errors=[] df_randint = pd.DataFrame(np.random.choice(np.arange(100), (100,2))) df_randint.columns = ['col1', 'col2'] - preprocessor3 = DeepProcessor(continuous_cols=['col1', 'col2']) + preprocessor3 = DeepPreprocessor(continuous_cols=['col1', 'col2']) try: X_randint = preprocessor3.fit_transform(df_randint) diff --git a/tests/test_data_utils/test_deep_image.py b/tests/test_data_utils/test_deep_image.py index f46362a5a0e514e707ed9a1e18b5bc1944e183a0..15f5c26fee97311ccba3c37280491581ddbf37c0 100644 --- a/tests/test_data_utils/test_deep_image.py +++ b/tests/test_data_utils/test_deep_image.py @@ -2,12 +2,12 @@ import numpy as np import pandas as pd import pytest -from pytorch_widedeep.preprocessing import ImageProcessor +from pytorch_widedeep.preprocessing import ImagePreprocessor df = pd.DataFrame({'galaxies': ['galaxy1.png', 'galaxy2.png']}) img_col = 'galaxies' imd_dir = 'images' -processor = ImageProcessor() +processor = ImagePreprocessor() X_imgs = processor.fit_transform(df, img_col, img_path=imd_dir) ############################################################################### diff --git a/tests/test_data_utils/test_deep_text.py b/tests/test_data_utils/test_deep_text.py index 62d6e467c6ff7ea78b6fc8bfc3334459c52562e7..29b452bd91bbb84f6b2fa7db542692902dab1ab5 100644 --- a/tests/test_data_utils/test_deep_text.py +++ b/tests/test_data_utils/test_deep_text.py @@ -4,11 +4,11 @@ import pytest import warnings from sklearn.datasets import fetch_20newsgroups -from pytorch_widedeep.preprocessing import TextProcessor +from pytorch_widedeep.preprocessing import TextPreprocessor texts = np.random.choice(fetch_20newsgroups().data, 10) df = pd.DataFrame({'texts':texts}) -processor = TextProcessor(min_freq=0) +processor = TextPreprocessor(min_freq=0) X_text = processor.fit_transform(df, 'texts') ############################################################################### diff --git a/tests/test_data_utils/test_wide.py b/tests/test_data_utils/test_wide.py index f7dc7cf6494e464021357619d6f0da88efaffd25..c5005b252c71007760a53338f1f8b9fc5627a806 100644 --- a/tests/test_data_utils/test_wide.py +++ b/tests/test_data_utils/test_wide.py @@ -2,7 +2,7 @@ import numpy as np import pandas as pd import pytest -from pytorch_widedeep.preprocessing import WideProcessor +from pytorch_widedeep.preprocessing import WidePreprocessor def create_test_dataset(input_type, with_crossed=True): @@ -29,7 +29,7 @@ cross_cols = [('col1', 'col2')] df_letters, unique_letters = create_test_dataset(some_letters) df_numbers, unique_numbers = create_test_dataset(some_numbers) -preprocessor1 = WideProcessor(wide_cols, cross_cols) +preprocessor1 = WidePreprocessor(wide_cols, cross_cols) @pytest.mark.parametrize('input_df, expected_shape', [ @@ -48,7 +48,7 @@ def test_preprocessor1(input_df, expected_shape): df_letters_wo_crossed, unique_letters_wo_crossed = create_test_dataset(some_letters, with_crossed=False) df_numbers_wo_crossed, unique_numbers_wo_crossed = create_test_dataset(some_numbers, with_crossed=False) -preprocessor2 = WideProcessor(wide_cols) +preprocessor2 = WidePreprocessor(wide_cols) @pytest.mark.parametrize('input_df, expected_shape', [ diff --git a/tests/test_model_functioning/test_callbacks.py b/tests/test_model_functioning/test_callbacks.py index b03a3f0b04ba10a90e380addd500a623d7993943..cb07a68b2c449ef7856f13e67a9675340ab6e8f0 100644 --- a/tests/test_model_functioning/test_callbacks.py +++ b/tests/test_model_functioning/test_callbacks.py @@ -5,13 +5,13 @@ import torch import pytest from torch import nn +from torch.optim.lr_scheduler import StepLR, CyclicLR from itertools import chain from pytorch_widedeep.models import Wide, DeepDense, DeepText, DeepImage, WideDeep -from pytorch_widedeep.optimizers import Adam, RAdam, SGD, RMSprop -from pytorch_widedeep.lr_schedulers import (StepLR, MultiStepLR, ExponentialLR, - ReduceLROnPlateau, CyclicLR, OneCycleLR) from pytorch_widedeep.callbacks import ModelCheckpoint, EarlyStopping, LRHistory +from pytorch_widedeep.optim import RAdam + # Wide array X_wide=np.random.choice(2, (100, 100), p = [0.8, 0.2]) @@ -35,11 +35,26 @@ target = np.random.choice(2, 100) ############################################################################### # Test that history saves the information adequately ############################################################################### -optimizers_1 = { 'wide': Adam, 'deepdense': Adam} -lr_schedulers_1 = { 'wide': StepLR(step_size=4), 'deepdense': CyclicLR(base_lr=0.001, max_lr=0.01, step_size_up=10, cycle_momentum=False)} +wide = Wide(100, 1) +deepdense = DeepDense(hidden_layers=[32,16], dropout=[0.5], deep_column_idx=deep_column_idx, + embed_input=embed_input, continuous_cols=colnames[-5:], output_dim=1) +model = WideDeep(wide=wide, deepdense=deepdense) + +wide_opt_1 = torch.optim.Adam(model.wide.parameters()) +deep_opt_1 = torch.optim.Adam(model.deepdense.parameters()) +wide_sch_1 = StepLR(wide_opt_1, step_size=4) +deep_sch_1 = CyclicLR(deep_opt_1, base_lr=0.001, max_lr=0.01, step_size_up=10, cycle_momentum=False) +optimizers_1 = {'wide': wide_opt_1, 'deepdense': deep_opt_1} +lr_schedulers_1 = {'wide': wide_sch_1, 'deepdense': deep_sch_1} + +wide_opt_2 = torch.optim.Adam(model.wide.parameters()) +deep_opt_2 = RAdam(model.deepdense.parameters()) +wide_sch_2 = StepLR(wide_opt_2, step_size=4) +deep_sch_2 = StepLR(deep_opt_2, step_size=4) +optimizers_2 = { 'wide': wide_opt_2, 'deepdense':deep_opt_2} +lr_schedulers_2 = {'wide': wide_sch_2,'deepdense': deep_sch_2} + -optimizers_2 = { 'wide': Adam, 'deepdense':RAdam} -lr_schedulers_2 = {'wide': StepLR(step_size=4),'deepdense': StepLR(step_size=4)} @pytest.mark.parametrize( 'optimizers, schedulers, len_loss_output, len_lr_output', [ @@ -48,10 +63,6 @@ lr_schedulers_2 = {'wide': StepLR(step_size=4),'deepdense': StepLR(step_size=4)} ] ) def test_history_callback(optimizers, schedulers, len_loss_output, len_lr_output): - wide = Wide(100, 1) - deepdense = DeepDense(hidden_layers=[32,16], dropout=[0.5], deep_column_idx=deep_column_idx, - embed_input=embed_input, continuous_cols=colnames[-5:], output_dim=1) - model = WideDeep(wide=wide, deepdense=deepdense) model.compile(method='logistic', optimizers=optimizers, lr_schedulers=schedulers, callbacks=[LRHistory], verbose=0) model.fit(X_wide=X_wide, X_deep=X_deep, X_text=X_text, target=target, n_epochs=5) diff --git a/tests/test_model_functioning/test_optimizers.py b/tests/test_model_functioning/test_optimizers.py deleted file mode 100644 index b93134cf337b290b43b24ff7ad8490bf425d85ee..0000000000000000000000000000000000000000 --- a/tests/test_model_functioning/test_optimizers.py +++ /dev/null @@ -1,54 +0,0 @@ -import numpy as np -import string -import torch -import pytest - -from torch import nn -from pytorch_widedeep.models import Wide, DeepDense, DeepText, DeepImage, WideDeep -from pytorch_widedeep.optimizers import Adam, RAdam, SGD, RMSprop -from copy import deepcopy as c - -# Wide array -X_wide=np.random.choice(2, (100, 100), p = [0.8, 0.2]) - -# Deep Array -colnames = list(string.ascii_lowercase)[:10] -embed_cols = [np.random.choice(np.arange(5), 100) for _ in range(5)] -embed_input = [(u,i,j) for u,i,j in zip(colnames[:5], [5]*5, [16]*5)] -cont_cols = [np.random.rand(100) for _ in range(5)] -deep_column_idx={k:v for v,k in enumerate(colnames)} -X_deep = np.vstack(embed_cols+cont_cols).transpose() - -# Text Array -padded_sequences = np.random.choice(np.arange(1,100), (100, 48)) -vocab_size = 1000 -X_text = np.hstack((np.repeat(np.array([[0,0]]), 100, axis=0), padded_sequences)) - -# Image Array -X_img = np.random.choice(256, (100, 224, 224, 3)) - -optimizers_1 = { 'wide': Adam, 'deepdense':RAdam, 'deeptext': SGD, 'deepimage':RMSprop} -optimizers_2 = { 'wide': RAdam, 'deepdense':SGD, 'deeptext': RMSprop} - -############################################################################### -# Test that the MultipleOptimizer class functions as expected -############################################################################### -@pytest.mark.parametrize("optimizers, expected_opt", - [ - (optimizers_1, { 'wide': 'Adam', 'deepdense':'RAdam', 'deeptext': 'SGD', 'deepimage':'RMSprop'}), - (optimizers_2, { 'wide': 'RAdam', 'deepdense':'SGD', 'deeptext': 'RMSprop', 'deepimage': 'Adam'}), - ], -) -def test_optimizers(optimizers, expected_opt): - wide = Wide(100, 1) - deepdense = DeepDense(hidden_layers=[32,16], dropout=[0.5], deep_column_idx=deep_column_idx, - embed_input=embed_input, continuous_cols=colnames[-5:], output_dim=1) - deeptext = DeepText( vocab_size=vocab_size, embed_dim=32, padding_idx=0) - deepimage=DeepImage(pretrained=True) - model = WideDeep(wide=wide, deepdense=deepdense, deeptext=deeptext, deepimage=deepimage) - model.compile(method='logistic', optimizers=optimizers) - out = [] - for name, opt in model.optimizer._optimizers.items(): - out.append(expected_opt[name] == opt.__class__.__name__ ) - assert all(out) - diff --git a/tests/test_model_functioning/test_schedulers.py b/tests/test_model_functioning/test_schedulers.py deleted file mode 100644 index d32c36ff325309a345958c807e3c5a59f3da4f4f..0000000000000000000000000000000000000000 --- a/tests/test_model_functioning/test_schedulers.py +++ /dev/null @@ -1,98 +0,0 @@ -import numpy as np -import string -import torch -import pytest - -from torch import nn -from pytorch_widedeep.models import Wide, DeepDense, DeepText, DeepImage, WideDeep -from pytorch_widedeep.optimizers import Adam, RAdam, SGD, RMSprop -from pytorch_widedeep.lr_schedulers import (StepLR, MultiStepLR, ExponentialLR, - ReduceLROnPlateau, CyclicLR, OneCycleLR) -from copy import deepcopy as c - - -# Wide array -X_wide=np.random.choice(2, (100, 100), p = [0.8, 0.2]) - -# Deep Array -colnames = list(string.ascii_lowercase)[:10] -embed_cols = [np.random.choice(np.arange(5), 100) for _ in range(5)] -embed_input = [(u,i,j) for u,i,j in zip(colnames[:5], [5]*5, [16]*5)] -cont_cols = [np.random.rand(100) for _ in range(5)] -deep_column_idx={k:v for v,k in enumerate(colnames)} -X_deep = np.vstack(embed_cols+cont_cols).transpose() - -# Text Array -padded_sequences = np.random.choice(np.arange(1,100), (100, 48)) -vocab_size = 1000 -X_text = np.hstack((np.repeat(np.array([[0,0]]), 100, axis=0), padded_sequences)) - -# target -target = np.random.choice(2, 100) - -############################################################################### -# Test that the Step based and Exponential Schedulers functions as expected. -############################################################################### -def test_step_and_exp_lr_schedulers(): - - optimizers = { 'wide': Adam, 'deepdense':RAdam, 'deeptext': SGD} - lr_schedulers = { 'wide': StepLR(step_size=4), 'deepdense':MultiStepLR(milestones=[2,8]), - 'deeptext': ExponentialLR(gamma=0.5)} - - wide = Wide(100, 1) - deepdense = DeepDense(hidden_layers=[32,16], dropout=[0.5], deep_column_idx=deep_column_idx, - embed_input=embed_input, continuous_cols=colnames[-5:], output_dim=1) - deeptext = DeepText( vocab_size=vocab_size, embed_dim=32, padding_idx=0) - model = WideDeep(wide=wide, deepdense=deepdense, deeptext=deeptext) - model.compile(method='logistic', optimizers=optimizers, lr_schedulers=lr_schedulers, - verbose=1) - model.fit(X_wide=X_wide, X_deep=X_deep, X_text=X_text, target=target, - n_epochs=10) - - out = [] - out.append( - model.optimizer._optimizers['wide'].param_groups[0]['initial_lr'] * 0.1**2 == \ - model.optimizer._optimizers['wide'].param_groups[0]['lr'] - ) - out.append( - model.optimizer._optimizers['deepdense'].param_groups[0]['initial_lr'] * 0.1**2 == \ - model.optimizer._optimizers['deepdense'].param_groups[0]['lr'] - ) - out.append( - model.optimizer._optimizers['deeptext'].param_groups[0]['initial_lr'] * 0.5**10 == \ - model.optimizer._optimizers['deeptext'].param_groups[0]['lr']) - - assert all(out) - -############################################################################### -# Test that the Cyclic Schedulers functions as expected. At the time of -# writting there is an issue related to the torch_shm_manager in torch v1.3.0 -# for pip + OSX. Therefore, I have not tested OneCycleLR which os only -# available for v1.3.0. -############################################################################### -def test_cyclic_lr_schedulers(): - - optimizers = { 'wide': Adam(lr=0.001), 'deepdense':Adam(lr=0.001)} - lr_schedulers = { - 'wide': CyclicLR(base_lr=0.001, max_lr=0.01, step_size_up=20, cycle_momentum=False), - 'deepdense': CyclicLR(base_lr=0.001, max_lr=0.01, step_size_up=10, cycle_momentum=False)} - - wide = Wide(100, 1) - deepdense = DeepDense(hidden_layers=[32,16], dropout=[0.5], deep_column_idx=deep_column_idx, - embed_input=embed_input, continuous_cols=colnames[-5:], output_dim=1) - model = WideDeep(wide=wide, deepdense=deepdense) - model.compile(method='logistic', optimizers=optimizers, lr_schedulers=lr_schedulers, - verbose=0) - model.fit(X_wide=X_wide, X_deep=X_deep, X_text=X_text, target=target, - n_epochs=5) - - out = [] - out.append( - np.isclose(model.optimizer._optimizers['wide'].param_groups[0]['lr'], 0.01) - ) - out.append( - model.optimizer._optimizers['deepdense'].param_groups[0]['initial_lr'] == \ - model.optimizer._optimizers['deepdense'].param_groups[0]['lr'] - ) - - assert all(out)