提交 fc4aed48 编写于 作者: J jrzaurin

added get and setstate methods in the EarlyStopping and ModelCheckpoint...

added get and setstate methods in the EarlyStopping and ModelCheckpoint callbacks. Added the possibility of using GRUs in the deeptext component and also predict using the hidden state or the output. Fixed a small bug in the text processor. Improved the save method in the Trainer
上级 8727ce66
......@@ -84,14 +84,16 @@ if __name__ == "__main__":
wide_sch = torch.optim.lr_scheduler.StepLR(wide_opt, step_size=2)
deep_sch = torch.optim.lr_scheduler.StepLR(deep_opt, step_size=3)
model_checkpoint = ModelCheckpoint(
filepath="model_weights/wd_out",
save_best_only=True,
max_save=1,
)
early_stopping = EarlyStopping(patience=5)
optimizers = {"wide": wide_opt, "deeptabular": deep_opt}
schedulers = {"wide": wide_sch, "deeptabular": deep_sch}
initializers = {"wide": KaimingNormal, "deeptabular": XavierNormal}
callbacks = [
LRHistory(n_epochs=10),
EarlyStopping(patience=5),
ModelCheckpoint(filepath="model_weights/wd_out"),
]
callbacks = [early_stopping, model_checkpoint, LRHistory(n_epochs=10)]
metrics = [Accuracy, Precision]
trainer = Trainer(
......@@ -108,16 +110,9 @@ if __name__ == "__main__":
X_wide=X_wide,
X_tab=X_tab,
target=target,
n_epochs=10,
n_epochs=2,
batch_size=64,
val_split=0.2,
)
# # to save/load the model
# trainer.save_model("model_weights/model.t")
# # ... days after
# model = Trainer.load_model("model_weights/model.t")
# # or via state dictionaries
# trainer.save_model_state_dict("model_weights/model_dict.t")
# # ... days after, with an instantiated class of Trainer
# trainer.load_model_state_dict("model_weights/model_dict.t")
trainer.save("widedeep")
......@@ -466,6 +466,14 @@ class ModelCheckpoint(Callback):
self.old_files = self.old_files[1:]
self.old_files.append(filepath)
def __getstate__(self):
d = self.__dict__
self_dict = {k: d[k] for k in d if k not in ["trainer", "model"]}
return self_dict
def __setstate__(self, state):
self.__dict__ = state
class EarlyStopping(Callback):
def __init__(
......@@ -608,3 +616,11 @@ class EarlyStopping(Callback):
RuntimeWarning,
)
return monitor_value
def __getstate__(self):
d = self.__dict__
self_dict = {k: d[k] for k in d if k not in ["trainer", "model"]}
return self_dict
def __setstate__(self, state):
self.__dict__ = state
......@@ -12,10 +12,12 @@ class DeepText(nn.Module):
def __init__(
self,
vocab_size: int,
rnn_type: str = "lstm",
hidden_dim: int = 64,
n_layers: int = 3,
rnn_dropout: float = 0.1,
bidirectional: bool = False,
use_hidden_state: bool = True,
padding_idx: int = 1,
embed_dim: Optional[int] = None,
embed_matrix: Optional[np.ndarray] = None,
......@@ -37,6 +39,8 @@ class DeepText(nn.Module):
----------
vocab_size: int
number of words in the vocabulary
rnn_type: str
String indicating the type of RNN to use. One of "lstm" or "rnn"
hidden_dim: int, default = 64
Hidden dim of the LSTM
n_layers: int, default = 3
......@@ -46,6 +50,9 @@ class DeepText(nn.Module):
the last layer
bidirectional: bool, default = True
indicates whether the staked RNNs are bidirectional
use_hidden_state: str, default = True,
Boolean indicating whether to use the final hidden state of the
rnn output as predicting features
padding_idx: int, default = 1
index of the padding token in the padded-tokenised sequences. I
use the ``fastai`` tokenizer where the token index 0 is reserved
......@@ -112,11 +119,18 @@ class DeepText(nn.Module):
UserWarning,
)
if rnn_type.lower() not in ["lstm", "gru"]:
raise ValueError(
f"'rnn_type' must be 'lstm' or 'gru', got {rnn_type} instead"
)
self.vocab_size = vocab_size
self.rnn_type = rnn_type
self.hidden_dim = hidden_dim
self.n_layers = n_layers
self.rnn_dropout = rnn_dropout
self.bidirectional = bidirectional
self.use_hidden_state = use_hidden_state
self.padding_idx = padding_idx
self.embed_dim = embed_dim
self.embed_trainable = embed_trainable
......@@ -152,14 +166,18 @@ class DeepText(nn.Module):
)
# stack of RNNs (LSTMs)
self.rnn = nn.LSTM(
embed_dim,
hidden_dim,
num_layers=n_layers,
bidirectional=bidirectional,
dropout=rnn_dropout,
batch_first=True,
)
rnn_params = {
"input_size": embed_dim,
"hidden_size": hidden_dim,
"num_layers": n_layers,
"bidirectional": bidirectional,
"dropout": rnn_dropout,
"batch_first": True,
}
if self.rnn_type.lower() == "lstm":
self.rnn: Union[nn.LSTM, nn.GRU] = nn.LSTM(**rnn_params)
elif self.rnn_type.lower() == "gru":
self.rnn = nn.GRU(**rnn_params)
# the output_dim attribute will be used as input_dim when "merging" the models
self.output_dim = hidden_dim * 2 if bidirectional else hidden_dim
......@@ -186,13 +204,23 @@ class DeepText(nn.Module):
classifier/regressor with an optional `'Fully Connected head'`
"""
embed = self.word_embed(X.long())
o, (h, c) = self.rnn(embed)
if self.rnn_type.lower() == "lstm":
o, (h, c) = self.rnn(embed)
elif self.rnn_type.lower() == "gru":
o, h = self.rnn(embed)
o = o.permute(1, 0, 2)
if self.bidirectional:
last_h = torch.cat((h[-2], h[-1]), dim=1)
rnn_out = (
torch.cat((h[-2], h[-1]), dim=1) if self.use_hidden_state else o[-1]
)
else:
last_h = h[-1]
rnn_out = h[-1] if self.use_hidden_state else o[-1]
if self.head_hidden_dims is not None:
out = self.texthead(last_h)
return out
head_out = self.texthead(rnn_out)
return head_out
else:
return last_h
return rnn_out
......@@ -135,6 +135,13 @@ class TabPreprocessor(BasePreprocessor):
raise ValueError(tabtransformer_error_message)
if self.for_tabtransformer and isinstance(self.embed_cols[0], tuple): # type: ignore[index]
raise ValueError(tabtransformer_error_message)
if self.for_tabtransformer and self.scale:
warnings.warn(
"Both 'for_tabtransformer' and 'scale' are set to True. "
"This implies that the continuous columns will be "
"standarized and then passed through a LayerNorm layer",
UserWarning,
)
def fit(self, df: pd.DataFrame) -> BasePreprocessor:
"""Fits the Preprocessor and creates required attributes"""
......
......@@ -21,6 +21,8 @@ class TextPreprocessor(BasePreprocessor):
max_vocab: int = 30000,
min_freq: int = 5,
maxlen: int = 80,
pad_first: bool = True,
pad_idx: int = 1,
word_vectors_path: Optional[str] = None,
verbose: int = 1,
):
......@@ -36,6 +38,11 @@ class TextPreprocessor(BasePreprocessor):
Minimum frequency for a token to be part of the vocabulary
maxlen: int, default=80
Maximum length of the tokenized sequences
pad_first: bool, default = True
Indicates whether the padding index will be added at the beginning or the
end of the sequences
pad_idx: int, default = 1
padding index. Fastai's Tokenizer leaves 0 for the 'unknown' token.
word_vectors_path: str, Optional
Path to the pretrained word vectors
verbose: int, default 1
......@@ -71,6 +78,8 @@ class TextPreprocessor(BasePreprocessor):
self.max_vocab = max_vocab
self.min_freq = min_freq
self.maxlen = maxlen
self.pad_first = pad_first
self.pad_idx = pad_idx
self.word_vectors_path = word_vectors_path
self.verbose = verbose
......@@ -83,6 +92,10 @@ class TextPreprocessor(BasePreprocessor):
)
if self.verbose:
print("The vocabulary contains {} tokens".format(len(self.vocab.stoi)))
if self.word_vectors_path is not None:
self.embedding_matrix = build_embeddings_matrix(
self.vocab, self.word_vectors_path, self.min_freq
)
return self
def transform(self, df: pd.DataFrame) -> np.ndarray:
......@@ -91,11 +104,17 @@ class TextPreprocessor(BasePreprocessor):
texts = df[self.text_col].tolist()
self.tokens = get_texts(texts)
sequences = [self.vocab.numericalize(t) for t in self.tokens]
padded_seq = np.array([pad_sequences(s, maxlen=self.maxlen) for s in sequences])
if self.word_vectors_path is not None:
self.embedding_matrix = build_embeddings_matrix(
self.vocab, self.word_vectors_path, self.min_freq
)
padded_seq = np.array(
[
pad_sequences(
s,
maxlen=self.maxlen,
pad_first=self.pad_first,
pad_idx=self.pad_idx,
)
for s in sequences
]
)
return padded_seq
def fit_transform(self, df: pd.DataFrame) -> np.ndarray:
......
import os
import json
from pathlib import Path
import numpy as np
import torch
......@@ -275,10 +276,13 @@ class Trainer:
self.verbose = verbose
self.seed = seed
self.early_stop = False
self.objective = objective
self.method = _ObjectiveToMethod.get(objective)
# initialize early_stop. If EarlyStopping Callback is used it will
# take care of it
self.early_stop = False
self.loss_fn = self._set_loss_fn(
objective, class_weight, custom_loss_function, alpha, gamma
)
......@@ -817,10 +821,10 @@ class Trainer:
path: str,
save_state_dict: bool = False,
model_filename: str = "wd_model.pt",
feat_imp_filename: str = "feature_importance.json",
):
"""Saves the model and the feature_importance attribute (if the
``deeptabular`` component is a Tabnet model) to disk
"""Saves the model, training and evaluation history, and the
feature_importance attribute (if the ``deeptabular`` component is a
Tabnet model) to disk
The ``Trainer`` class is built so that it 'just' trains a model. With
that in mind, all the torch related parameters (such as optimizers,
......@@ -847,23 +851,31 @@ class Trainer:
model's state dictionary
model_filename: str, Optional, default = "wd_model.pt"
filename where the model weights will be store
feat_imp_filename: str, Optional, default = "feature_importance.json"
filename where the feature importances will be stored
"""
# TO DO: ask advide on saving strategy
if not os.path.exists(path):
os.makedirs(path)
save_dir = Path(path)
history_dir = save_dir / "history"
history_dir.mkdir(exist_ok=True, parents=True)
# the trainer is run with the History Callback by default
with open(history_dir / "train_eval_history.json", "w") as teh:
json.dump(self.history, teh) # type: ignore[attr-defined]
has_lr_history = any(
[clbk.__class__.__name__ == "LRHistory" for clbk in self.callbacks]
)
if has_lr_history:
with open(history_dir / "lr_history.json", "w") as lrh:
json.dump(self.lr_history, lrh) # type: ignore[attr-defined]
model_path = "/".join([path, model_filename])
model_path = save_dir / model_filename
if save_state_dict:
torch.save(self.model.state_dict(), model_path)
else:
torch.save(self.model, model_path)
if self.model.is_tabnet:
feature_importance_fname = "/".join([path, feat_imp_filename])
with open(feature_importance_fname, "w") as fi:
with open(save_dir / "feature_importance.json", "w") as fi:
json.dump(self.feature_importance, fi)
def _restore_best_weights(self):
......@@ -894,7 +906,8 @@ class Trainer:
if self.verbose:
print(
"Model weights after training corresponds to the those of the "
"final epoch which might not be the best performing weights"
"final epoch which might not be the best performing weights. Use"
"the 'ModelCheckpoint' Callback to restore the best epoch weights."
)
def _finetune(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册