提交 e2b8ccbe 编写于 作者: J jrzaurin

added documentation for model components. Arranged modules to expose utils

上级 333ffc9c
......@@ -31,7 +31,6 @@ if __name__ == '__main__':
('occupation',10),('native_country',10)]
continuous_cols = ["age","hours_per_week"]
target = 'income_label'
target = df[target].values
prepare_wide = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols)
X_wide = prepare_wide.fit_transform(df)
......@@ -42,7 +41,7 @@ if __name__ == '__main__':
output_dim=1)
deepdense = DeepDense(
hidden_layers=[64,32],
dropout=[0.5],
dropout=[0.2,0.2],
deep_column_idx=prepare_deep.deep_column_idx,
embed_input=prepare_deep.embeddings_input,
continuous_cols=continuous_cols)
......@@ -56,14 +55,14 @@ if __name__ == '__main__':
optimizers = {'wide': wide_opt, 'deepdense':deep_opt}
schedulers = {'wide': wide_sch, 'deepdense':deep_sch}
initializers = {'wide': KaimingNormal, 'deepdense':XavierNormal}
callbacks = [LRHistory, EarlyStopping, ModelCheckpoint(filepath='../model_weights/wd_out')]
callbacks = [LRHistory(n_epochs=10), EarlyStopping, ModelCheckpoint(filepath='../model_weights/wd_out')]
metrics = [BinaryAccuracy]
model.compile(
method='logistic',
initializers=initializers,
method='binary',
optimizers=optimizers,
lr_schedulers=schedulers,
initializers=initializers,
callbacks=callbacks,
metrics=metrics)
......
......@@ -49,7 +49,7 @@ if __name__ == '__main__':
output_dim=1)
deepdense = DeepDense(
hidden_layers=[64,32],
dropout=[0.5],
dropout=[0.2,0.2],
deep_column_idx=prepare_deep.deep_column_idx,
embed_input=prepare_deep.embeddings_input,
continuous_cols=continuous_cols)
......@@ -58,31 +58,26 @@ if __name__ == '__main__':
hidden_dim=64,
n_layers=3,
rnn_dropout=0.5,
spatial_dropout=0.5,
padding_idx=1,
embedding_matrix=text_processor.embedding_matrix
)
deepimage = DeepImage(pretrained=True, head_layers=None)
model = WideDeep(wide=wide, deepdense=deepdense, deeptext=deeptext,
deepimage=deepimage, head_layers=[256, 128, 64])
# pdb.set_trace()
deepimage=deepimage)
wide_opt = torch.optim.Adam(model.wide.parameters())
deep_opt = torch.optim.Adam(model.deepdense.parameters())
text_opt = RAdam(model.deeptext.parameters())
img_opt = RAdam(model.deepimage.parameters())
head_opt = torch.optim.Adam(model.head.parameters())
wide_sch = torch.optim.lr_scheduler.StepLR(wide_opt, step_size=5)
deep_sch = torch.optim.lr_scheduler.StepLR(deep_opt, step_size=3)
text_sch = torch.optim.lr_scheduler.StepLR(text_opt, step_size=5)
img_sch = torch.optim.lr_scheduler.StepLR(img_opt, step_size=3)
head_sch = torch.optim.lr_scheduler.StepLR(head_opt, step_size=5)
optimizers = {'wide': wide_opt, 'deepdense':deep_opt, 'deeptext':text_opt, 'deepimage': img_opt, 'head': head_opt}
schedulers = {'wide': wide_sch, 'deepdense':deep_sch, 'deeptext':text_sch, 'deepimage': img_sch, 'head': head_sch}
initializers = {'wide': KaimingNormal, 'deepdense':KaimingNormal, 'deeptext':KaimingNormal, 'deepimage':KaimingNormal,
'head': KaimingNormal}
optimizers = {'wide': wide_opt, 'deepdense':deep_opt, 'deeptext':text_opt, 'deepimage': img_opt}
schedulers = {'wide': wide_sch, 'deepdense':deep_sch, 'deeptext':text_sch, 'deepimage': img_sch}
initializers = {'wide': KaimingNormal, 'deepdense':KaimingNormal, 'deeptext':KaimingNormal, 'deepimage':KaimingNormal}
mean = [0.406, 0.456, 0.485] #BGR
std = [0.225, 0.224, 0.229] #BGR
transforms = [ToTensor, Normalize(mean=mean, std=std)]
......
......@@ -132,7 +132,9 @@ class History(Callback):
class LRHistory(Callback):
def __init__(self, n_epochs):
super(LRHistory, self).__init__()
self.n_epochs = n_epochs
def on_epoch_begin(self, epoch:int, logs:Optional[Dict]=None):
if epoch==0 and self.model.lr_scheduler:
......@@ -165,7 +167,7 @@ class LRHistory(Callback):
("_").join(['lr', str(group_idx)]),[]).append(group['lr'])
def on_epoch_end(self, epoch:int, logs:Optional[Dict]=None):
if self.model.lr_scheduler:
if epoch != (self.n_epochs-1) and self.model.lr_scheduler:
if self.model.lr_scheduler.__class__.__name__ == 'MultipleLRScheduler':
for model_name, opt in self.model.optimizer._optimizers.items():
if model_name in self.model.lr_scheduler._schedulers:
......
......@@ -22,6 +22,54 @@ def dense_layer(inp:int, out:int, dropout:float, batchnorm=False):
class DeepDense(nn.Module):
r"""Dense branch of the deep side of the model. This class combines embedding
representations of the categorical features with numerical (aka
continuous) features. These are then passed through a series of dense
layers.
Parameters
----------
deep_column_idx: Dict containing the index of the columns that will be
passed through the DeepDense model. Required to slice the tensors. e.g.
{'education': 0, 'relationship': 1, 'workclass': 2, ...}
hidden_layers: List with the number of neurons per dense layer. e.g: [64,32]
dropout: Optional List with the dropout between the dense layers.
e.g: [0.5,0.5]
batchnorm: Optional Boolean indicating whether or not to include batch
normalizatin in the dense layers
embeddings_input: Optional List of Tuples with the column name, number of
unique values and embedding dimension. e.g. [(education, 11, 32), ...]
continuous_cols: Optional List with the name of the numeric (aka
continuous) columns
**Either embeddings_input or continuous_cols (or both) should be passed to the
model
Attributes
----------
dense: nn.Sequential model of dense layers that will receive the
concatenation of the embeddings and the continuous columns
embed_layers: nn.ModuleDict with the embedding layers
output_dim: integer containing the output dimension of the model. This is a
required attribute neccesary to build the WideDeep class
Example
--------
>>> import torch
>>> from pytorch_widedeep.models import DeepDense
>>> X_deep = torch.cat((torch.empty(5, 4).random_(4), torch.rand(5, 1)), axis=1)
>>> colnames = ['a', 'b', 'c', 'd', 'e']
>>> embed_input = [(u,i,j) for u,i,j in zip(colnames[:4], [4]*4, [8]*4)]
>>> deep_column_idx = {k:v for v,k in enumerate(colnames)}
>>> model = DeepDense(hidden_layers=[8,4], deep_column_idx=deep_column_idx, embed_input=embed_input)
>>> model(X_deep)
tensor([[ 3.4470e-02, -2.0089e-03, 4.7983e-02, 3.3500e-01],
[ 1.4329e-02, -1.3800e-03, -3.3617e-04, 4.1046e-01],
[-3.3546e-04, 3.2413e-02, -4.1198e-03, 4.8717e-01],
[-6.7882e-04, 7.9103e-03, -1.9960e-03, 4.2134e-01],
[ 6.7187e-02, -1.2821e-03, -3.0960e-04, 3.6123e-01]],
grad_fn=<LeakyReluBackward1>)
"""
def __init__(self,
deep_column_idx:Dict[str,int],
hidden_layers:List[int],
......@@ -50,7 +98,7 @@ class DeepDense(nn.Module):
# Dense Layers
input_dim = emb_inp_dim + cont_inp_dim
hidden_layers = [input_dim] + hidden_layers
dropout = [0.] + dropout if dropout is not None else [0.]*(len(hidden_layers)-1)
if not dropout: dropout = [0.]*len(hidden_layers)
batchnorm = batchnorm if batchnorm is not None else False
self.dense = nn.Sequential()
for i in range(1, len(hidden_layers)):
......
......@@ -20,6 +20,52 @@ def conv_layer(ni:int, nf:int, ks:int=3, stride:int=1, maxpool:bool=True,
class DeepImage(nn.Module):
r"""
Standard image classifier/regressor using a pretrained network freezing
some of the first layers, or all layers. I use Resnets which have 9
"components" before the last dense layers.
The first 4 are: conv->batchnorm->relu->maxpool.
After that we have 4 additional 'layers' (resnet blocks) (so 4+4=8)
comprised by a series of convolutions and then the final AdaptiveAvgPool2d
(8+1=9). The parameter freeze sets the layers to be frozen. For example,
freeze=6 will freeze all but the last 2 Layers and AdaptiveAvgPool2d
layer. If freeze='all' it freezes the entire network. In addition, there
is the option to add a Fully Connected (FC) set of dense layers (FC-Head,
referred as 'imagehead') on top of the stack of RNNs
Parameters
----------
pretrained: boolean that indicates whether or not we use a pretrained Resnet network
or a series of conv layers (see conv_layer function)
resnet: int indicating the resnet architecture. One of 18, 34 or 50
freeze: int or string indicating the number of layers to freeze. If int
must be less than 8
head_layers: optional list with the sizes of the stacked dense layers in the head
e.g: [128, 64]
head_dropout: optional list with the dropout between the dense layers.
e.g: [0.5, 0.5].
head_batchnorm: Optional Boolean indicating whether or not to include batch
normalizatin in the dense layers that form the imagehead
Attributes
----------
backbone: Sequential stack of CNNs comprising the 'backbone' of the network
imagehead: Sequential stack of dense layers comprising the FC-Head (aka imagehead)
output_dim: integer containing the output dimension of the model. This is a
required attribute neccesary to build the WideDeep class
Example
--------
>>> import torch
>>> from pytorch_widedeep.models import DeepImage
>>> X_img = torch.rand((2,3,224,224))
>>> model = DeepImage(head_layers=[512, 64, 8])
>>> model(X_img)
tensor([[ 7.7234e-02, 8.0923e-02, 2.3077e-01, -5.1122e-03, -4.3018e-03,
3.1193e-01, 3.0780e-01, 6.5098e-01],
[ 4.6191e-02, 6.7856e-02, -3.0163e-04, -3.7670e-03, -2.1437e-03,
1.5416e-01, 3.9227e-01, 5.5048e-01]], grad_fn=<LeakyReluBackward1>)
"""
def __init__(self,
pretrained:bool=True,
......@@ -29,20 +75,6 @@ class DeepImage(nn.Module):
head_dropout:Optional[List[float]]=None,
head_batchnorm:Optional[bool] = False):
super(DeepImage, self).__init__()
"""
Standard image classifier/regressor using a pretrained network
freezing some of the first layers (or all layers).
I use Resnets which have 9 "components" before the last dense layers.
The first 4 are: conv->batchnorm->relu->maxpool.
After that we have 4 additional 'layers' (so 4+4=8) comprised by a
series of convolutions and then the final AdaptiveAvgPool2d (8+1=9).
The parameter freeze sets the last layer to be frozen. For example,
freeze=6 will freeze all but the last 2 Layers and AdaptiveAvgPool2d
layer. If freeze='all' it freezes the entire network.
"""
self.head_layers = head_layers
......@@ -86,9 +118,14 @@ class DeepImage(nn.Module):
self.output_dim = 512
if self.head_layers is not None:
self.head = nn.Sequential()
assert self.head_layers[0]==self.output_dim, (
"The output dimension from the backbone ({}) is not consistent with "
"the expected input dimension ({}) of the fc-head".format(
self.output_dim, self.head_layers[0]))
if not head_dropout: head_dropout = [0.]*len(head_layers)
self.imagehead = nn.Sequential()
for i in range(1, len(head_layers)):
self.head.add_module(
self.imagehead.add_module(
'dense_layer_{}'.format(i-1),
dense_layer(head_layers[i-1], head_layers[i], head_dropout[i-1], head_batchnorm)
)
......@@ -98,7 +135,7 @@ class DeepImage(nn.Module):
x = self.backbone(x)
x = x.view(x.size(0), -1)
if self.head_layers is not None:
out = self.head(x)
out = self.imagehead(x)
return out
else:
return x
\ No newline at end of file
......@@ -4,22 +4,68 @@ import warnings
from torch import nn
from ..wdtypes import *
from .deep_dense import dense_layer
class DeepText(nn.Module):
r"""Standard text classifier/regressor comprised by a stack of RNNs (LSTMs).
In addition, there is the option to add a Fully Connected (FC) set of dense
layers (FC-Head, referred as 'texthead') on top of the stack of RNNs
Parameters
----------
vocab_size: number of words in the vocabulary
hidden_dim: number of features in the hidden state h of the LSTM
n_layers: number of recurrent layers
rnn_dropout: dropout layer on the outputs of each LSTM layer except the last
layer
bidirectional: boolean indicating whether the staked RNNs are bidirectional
padding_idx: index of the padding token in the padded-tokenised sequences.
default: 1. I use the fastai tokenizer where the token index 0 is
reserved for the unknown word token
embed_dim: optional integer indicating the dimension of the word embedding matrix
embedding_matrix: optional array with pretrained word embeddings
head_layers: optional list with the sizes of the stacked dense layers in the head
e.g: [128, 64]
head_dropout: optional list with the dropout between the dense layers.
e.g: [0.5, 0.5].
head_batchnorm: Optional Boolean indicating whether or not to include batch
normalizatin in the dense layers that form the texthead
Attributes
----------
word_embed: Module with the word embedding matrix
rnn: Module with the stack of LSTMs
texthead: optional Sequential stack of dense layers
output_dim: integer containing the output dimension of the model. This is a
required attribute neccesary to build the WideDeep class
Example
--------
>>> import torch
>>> from pytorch_widedeep.models import DeepText
>>> X_text = torch.cat((torch.zeros([5,1]), torch.empty(5, 4).random_(1,4)), axis=1)
>>> model = DeepText(vocab_size=4, hidden_dim=4, n_layers=1, padding_idx=0, embed_dim=4)
>>> model(X_text)
tensor([[ 0.0315, 0.0393, -0.0618, -0.0561],
[-0.0674, 0.0297, -0.1118, -0.0668],
[-0.0446, 0.0814, -0.0921, -0.0338],
[-0.0844, 0.0681, -0.1016, -0.0464],
[-0.0268, 0.0294, -0.0988, -0.0666]], grad_fn=<SelectBackward>)
"""
def __init__(self,
vocab_size:int,
hidden_dim:int=64,
n_layers:int=3,
rnn_dropout:float=0.,
padding_idx:int=1,
bidirectional:bool=False,
padding_idx:int=1,
embed_dim:Optional[int]=None,
embedding_matrix:Optional[np.ndarray]=None):
embedding_matrix:Optional[np.ndarray]=None,
head_layers:Optional[List[int]] = None,
head_dropout:Optional[List[float]]=None,
head_batchnorm:Optional[bool] = False):
super(DeepText, self).__init__()
"""
Standard Text Classifier/Regressor with a stack of RNNs.
"""
if embed_dim is not None and embedding_matrix is not None and not embed_dim==embedding_matrix.shape[1]:
warnings.warn(
......@@ -29,7 +75,7 @@ class DeepText(nn.Module):
embedding_matrix.shape[1]), UserWarning)
self.bidirectional = bidirectional
self.word_embed_dropout = nn.Dropout2d(spatial_dropout)
self.head_layers = head_layers
# Pre-trained Embeddings
if isinstance(embedding_matrix, np.ndarray):
......@@ -50,6 +96,20 @@ class DeepText(nn.Module):
# the output_dim attribute will be used as input_dim when "merging" the models
self.output_dim = hidden_dim*2 if bidirectional else hidden_dim
if self.head_layers is not None:
assert self.head_layers[0]==self.output_dim, (
"The output dimension from the stack or RNNs ({}) is not consistent with "
"the expected input dimension ({}) of the fc-head".format(
self.output_dim, self.head_layers[0]))
if not head_dropout: head_dropout = [0.]*len(head_layers)
self.texthead = nn.Sequential()
for i in range(1, len(head_layers)):
self.texthead.add_module(
'dense_layer_{}'.format(i-1),
dense_layer(head_layers[i-1], head_layers[i], head_dropout[i-1], head_batchnorm)
)
self.output_dim = head_layers[-1]
def forward(self, X:Tensor)->Tensor:
embed = self.word_embed(X.long())
......@@ -58,4 +118,8 @@ class DeepText(nn.Module):
last_h = torch.cat((h[-2], h[-1]), dim = 1)
else:
last_h = h[-1]
return last_h
if self.head_layers is not None:
out = self.head(last_h)
return out
else:
return last_h
\ No newline at end of file
......@@ -4,6 +4,30 @@ from torch import nn
from ..wdtypes import *
class Wide(nn.Module):
r"""simple linear layer between the one-hot encoded wide input and the output
neuron.
Parameters
----------
wide_dim: size of the input tensor
output_dim: size of the ouput tensor
Attributes
----------
wide_linear: the linear layer that comprises the wide branch of the model
Example
--------
>>> import torch
>>> from pytorch_widedeep.models import Wide
>>> X = torch.empty(4, 4).random_(2)
>>> wide = Wide(wide_dim=X.size(0), output_dim=1)
>>> wide(X)
tensor([[-0.8841],
[-0.8633],
[-1.2713],
[-0.4762]], grad_fn=<AddmmBackward>)
"""
def __init__(self,wide_dim:int, output_dim:int=1):
super(Wide, self).__init__()
self.wide_linear = nn.Linear(wide_dim, output_dim)
......
from ._preprocessors import WidePreprocessor
from ._preprocessors import DeepPreprocessor
from ._preprocessors import TextPreprocessor
from ._preprocessors import ImagePreprocessor
from ._preprocessors import ImagePreprocessor
\ No newline at end of file
......@@ -10,9 +10,9 @@ from scipy.sparse import csc_matrix
from tqdm import tqdm
from ..wdtypes import *
from .utils.dense_utils import *
from .utils.text_utils import *
from .utils.image_utils import *
from ..utils.dense_utils import *
from ..utils.text_utils import *
from ..utils.image_utils import *
class BasePreprocessor(object):
......@@ -184,7 +184,8 @@ class TextPreprocessor(BasePreprocessor):
if self.verbose:
print("The vocabulary contains {} words".format(len(self.vocab.stoi)))
if self.word_vectors_path is not None:
self.embedding_matrix = build_embeddings_matrix(self.vocab, self.word_vectors_path)
self.embedding_matrix = build_embeddings_matrix(self.vocab, self.word_vectors_path,
self.min_freq)
return padded_seq
def fit_transform(self, df:pd.DataFrame, text_col:str)->np.ndarray:
......
import numpy as np
import pandas as pd
from ...wdtypes import *
from ..wdtypes import *
pd.options.mode.chained_assignment = None
......
......@@ -8,7 +8,7 @@ way I avoid the numerous fastai dependencies.
Credit for the code here to Jeremy Howard and the fastai team
'''
from ...wdtypes import *
from ..wdtypes import *
import sys
import os
......
......@@ -11,7 +11,7 @@ import numpy as np
import imutils
import cv2
from ...wdtypes import *
from ..wdtypes import *
class AspectAwarePreprocessor:
......
......@@ -4,7 +4,7 @@ import html
import os
import re
from ...wdtypes import *
from ..wdtypes import *
from .fastai_transforms import Tokenizer, Vocab
from gensim.utils import tokenize
......@@ -35,7 +35,8 @@ def pad_sequences(seq:List[int], maxlen:int, pad_first:bool=True, pad_idx:int=1)
return res
def build_embeddings_matrix(vocab:Vocab, word_vectors_path:str, verbose:int=1) -> np.ndarray:
def build_embeddings_matrix(vocab:Vocab, word_vectors_path:str, min_freq:int,
verbose:int=1) -> np.ndarray:
if not os.path.isfile(word_vectors_path):
raise FileNotFoundError("{} not found".format(word_vectors_path))
......@@ -68,7 +69,8 @@ def build_embeddings_matrix(vocab:Vocab, word_vectors_path:str, verbose:int=1) -
embedding_matrix[i] = mean_word_vector
if verbose:
print('{} words in the vocabulary had {} vectors and appear more than the min frequency'.format(found_words, word_vectors_path))
print('{} words in the vocabulary had {} vectors and appear more than {} times'.format(
found_words, word_vectors_path, min_freq))
return embedding_matrix
import sys
import scipy
from torch.nn import Module
from torch import Tensor
......@@ -10,9 +9,7 @@ from torch.optim.lr_scheduler import _LRScheduler
from pathlib import PosixPath
from typing import (List, Any, Union, Dict, Callable, Optional, Tuple,
Generator, Collection, Iterable)
sparse_matrix = Union[scipy.sparse.csr.csr_matrix]
from scipy.sparse.csr import csr_matrix as sparse_matrix
SimpleNamespace = type(sys.implementation)
ListRules = Collection[Callable[[str],str]]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册