Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Pytorch Widedeep
提交
f9b1e8cb
P
Pytorch Widedeep
项目概览
Greenplum
/
Pytorch Widedeep
11 个月 前同步成功
通知
9
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Pytorch Widedeep
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
f9b1e8cb
编写于
12月 25, 2021
作者:
J
jrzaurin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
re-arranged the code for the bayesian model and added docs
上级
f7ebc2ee
变更
17
展开全部
隐藏空白更改
内联
并排
Showing
17 changed file
with
1324 addition
and
208 deletion
+1324
-208
docs/bayesian_models.rst
docs/bayesian_models.rst
+15
-0
docs/index.rst
docs/index.rst
+1
-0
examples/scripts/adult_census_bayesian_tabmlp.py
examples/scripts/adult_census_bayesian_tabmlp.py
+127
-0
pytorch_widedeep/bayesian_models/__init__.py
pytorch_widedeep/bayesian_models/__init__.py
+1
-0
pytorch_widedeep/bayesian_models/_base_bayesian_model.py
pytorch_widedeep/bayesian_models/_base_bayesian_model.py
+15
-6
pytorch_widedeep/bayesian_models/_weight_sampler.py
pytorch_widedeep/bayesian_models/_weight_sampler.py
+14
-0
pytorch_widedeep/bayesian_models/bayesian_nn/__init__.py
pytorch_widedeep/bayesian_models/bayesian_nn/__init__.py
+1
-0
pytorch_widedeep/bayesian_models/bayesian_nn/modules/__init__.py
..._widedeep/bayesian_models/bayesian_nn/modules/__init__.py
+2
-0
pytorch_widedeep/bayesian_models/bayesian_nn/modules/bayesian_embedding.py
...bayesian_models/bayesian_nn/modules/bayesian_embedding.py
+181
-0
pytorch_widedeep/bayesian_models/bayesian_nn/modules/bayesian_linear.py
...ep/bayesian_models/bayesian_nn/modules/bayesian_linear.py
+65
-12
pytorch_widedeep/bayesian_models/tabular/bayesian_embeddings_layers.py
...eep/bayesian_models/tabular/bayesian_embeddings_layers.py
+24
-159
pytorch_widedeep/bayesian_models/tabular/bayesian_linear/bayesian_wide.py
.../bayesian_models/tabular/bayesian_linear/bayesian_wide.py
+67
-12
pytorch_widedeep/bayesian_models/tabular/bayesian_mlp/_layers.py
..._widedeep/bayesian_models/tabular/bayesian_mlp/_layers.py
+2
-2
pytorch_widedeep/bayesian_models/tabular/bayesian_mlp/bayesian_tab_mlp.py
.../bayesian_models/tabular/bayesian_mlp/bayesian_tab_mlp.py
+93
-8
pytorch_widedeep/losses.py
pytorch_widedeep/losses.py
+10
-2
pytorch_widedeep/training/_trainer_utils.py
pytorch_widedeep/training/_trainer_utils.py
+105
-7
pytorch_widedeep/training/bayesian_trainer.py
pytorch_widedeep/training/bayesian_trainer.py
+601
-0
未找到文件。
docs/bayesian_models.rst
0 → 100644
浏览文件 @
f9b1e8cb
The ``models`` module
======================
This module contains the two Bayesian Models available in this library, namely
the bayesian version of the Wide and TabMlp models, referred as ``BayesianWide``
and ``BayesianTabMlp``
.. autoclass:: pytorch_widedeep.bayesian_models.tabular.bayesian_linear.bayesian_wide.BayesianWide
:exclude-members: forward
:members:
.. autoclass:: pytorch_widedeep.bayesian_models.tabular.bayesian_mlp.bayesian_tab_mlp.BayesianTabMlp
:exclude-members: forward
:members:
docs/index.rst
浏览文件 @
f9b1e8cb
...
...
@@ -18,6 +18,7 @@ Documentation
Utilities <utils/index>
Preprocessing <preprocessing>
Model Components <model_components>
Bayesian Models <bayesian_models>
Metrics <metrics>
Losses <losses>
Dataloaders <dataloaders>
...
...
examples/scripts/adult_census_bayesian_tabmlp.py
0 → 100644
浏览文件 @
f9b1e8cb
from
pathlib
import
Path
import
numpy
as
np
import
torch
import
pandas
as
pd
from
pytorch_widedeep.metrics
import
Accuracy
from
pytorch_widedeep.callbacks
import
EarlyStopping
,
ModelCheckpoint
from
pytorch_widedeep.preprocessing
import
TabPreprocessor
,
WidePreprocessor
from
pytorch_widedeep.bayesian_models
import
BayesianWide
,
BayesianTabMlp
from
pytorch_widedeep.training.bayesian_trainer
import
BayesianTrainer
use_cuda
=
torch
.
cuda
.
is_available
()
if
__name__
==
"__main__"
:
DATA_PATH
=
Path
(
"../tmp_data"
)
df
=
pd
.
read_csv
(
DATA_PATH
/
"adult/adult.csv.zip"
)
df
.
columns
=
[
c
.
replace
(
"-"
,
"_"
)
for
c
in
df
.
columns
]
df
[
"age_buckets"
]
=
pd
.
cut
(
df
.
age
,
bins
=
[
16
,
25
,
30
,
35
,
40
,
45
,
50
,
55
,
60
,
91
],
labels
=
np
.
arange
(
9
)
)
df
[
"income_label"
]
=
(
df
[
"income"
].
apply
(
lambda
x
:
">50K"
in
x
)).
astype
(
int
)
df
.
drop
(
"income"
,
axis
=
1
,
inplace
=
True
)
df
.
head
()
for
model_name
in
[
"linear"
,
"mlp"
]:
for
objective
in
[
"binary"
,
"multiclass"
,
"regression"
]:
cat_cols
=
[
"workclass"
,
"education"
,
"marital_status"
,
"occupation"
,
"relationship"
,
"native_country"
,
"race"
,
"gender"
,
]
if
model_name
==
"linear"
:
crossed_cols
=
[
(
"education"
,
"occupation"
),
(
"native_country"
,
"occupation"
),
]
if
objective
==
"binary"
:
continuous_cols
=
[
"age"
,
"hours_per_week"
]
target_name
=
"income_label"
target
=
df
[
target_name
].
values
elif
objective
==
"multiclass"
:
continuous_cols
=
[
"hours_per_week"
]
target_name
=
"age_buckets"
target
=
np
.
array
(
df
[
target_name
].
tolist
())
elif
objective
==
"regression"
:
continuous_cols
=
[
"hours_per_week"
]
target_name
=
"age"
target
=
df
[
target_name
].
values
if
model_name
==
"linear"
:
prepare_wide
=
WidePreprocessor
(
wide_cols
=
cat_cols
,
crossed_cols
=
crossed_cols
)
X_tab
=
prepare_wide
.
fit_transform
(
df
)
model
=
BayesianWide
(
input_dim
=
np
.
unique
(
X_tab
).
shape
[
0
],
pred_dim
=
df
[
"age_buckets"
].
nunique
()
if
objective
==
"multiclass"
else
1
,
prior_sigma_1
=
1.0
,
prior_sigma_2
=
0.002
,
prior_pi
=
0.8
,
posterior_mu_init
=
0
,
posterior_rho_init
=-
7.0
,
)
if
model_name
==
"mlp"
:
prepare_tab
=
TabPreprocessor
(
embed_cols
=
cat_cols
,
continuous_cols
=
continuous_cols
# type: ignore[arg-type]
)
X_tab
=
prepare_tab
.
fit_transform
(
df
)
model
=
BayesianTabMlp
(
# type: ignore[assignment]
column_idx
=
prepare_tab
.
column_idx
,
cat_embed_input
=
prepare_tab
.
embeddings_input
,
continuous_cols
=
continuous_cols
,
# embed_continuous=True,
mlp_hidden_dims
=
[
128
,
64
],
prior_sigma_1
=
1.0
,
prior_sigma_2
=
0.002
,
prior_pi
=
0.8
,
posterior_mu_init
=
0
,
posterior_rho_init
=-
7.0
,
pred_dim
=
df
[
"age_buckets"
].
nunique
()
if
objective
==
"multiclass"
else
1
,
)
model_checkpoint
=
ModelCheckpoint
(
filepath
=
"model_weights/wd_out"
,
save_best_only
=
True
,
max_save
=
1
,
)
early_stopping
=
EarlyStopping
(
patience
=
2
)
callbacks
=
[
early_stopping
,
model_checkpoint
]
metrics
=
[
Accuracy
]
if
objective
!=
"regression"
else
None
trainer
=
BayesianTrainer
(
model
,
objective
=
objective
,
optimizer
=
torch
.
optim
.
Adam
(
model
.
parameters
(),
lr
=
0.01
),
callbacks
=
callbacks
,
metrics
=
metrics
,
)
trainer
.
fit
(
X_tab
=
X_tab
,
target
=
target
,
val_split
=
0.2
,
n_epochs
=
1
,
batch_size
=
256
,
)
# simply to check predicts functions as expected
preds
=
trainer
.
predict
(
X_tab
=
X_tab
)
pytorch_widedeep/bayesian_models/__init__.py
浏览文件 @
f9b1e8cb
from
pytorch_widedeep.bayesian_models
import
bayesian_nn
from
pytorch_widedeep.bayesian_models.tabular
import
(
BayesianWide
,
BayesianTabMlp
,
...
...
pytorch_widedeep/bayesian_models/_base_bayesian_model.py
浏览文件 @
f9b1e8cb
import
torch
from
torch
import
nn
from
pytorch_widedeep.wdtypes
import
*
# noqa: F403
class
BayesianModule
(
nn
.
Module
):
r
"""Simply a 'hack' to facilitate the computation of the KL divergence for all
Bayesian models
"""
def
init
(
self
):
super
().
__init__
()
class
BaseBayesianModel
(
nn
.
Module
):
r
""" "Base model containing the two methods common to all Bayesian models"""
def
_kl_divergence
(
self
):
kld
=
0
for
module
in
self
.
modules
():
...
...
@@ -23,13 +30,15 @@ class BaseBayesianModel(nn.Module):
loss_fn
:
nn
.
Module
,
n_samples
:
int
,
n_batches
:
int
,
pred_dim
:
int
,
)
->
Tensor
:
outputs
=
torch
.
zeros
(
n_samples
,
target
.
shape
[
0
],
pred_dim
)
)
->
Tuple
[
Tensor
,
Tensor
]:
outputs
_l
=
[]
kld
=
0.0
for
i
in
range
(
n_samples
):
outputs
[
i
]
=
self
(
input
)
for
_
in
range
(
n_samples
):
outputs
_l
.
append
(
self
(
input
)
)
kld
+=
self
.
_kl_divergence
()
outputs
=
torch
.
stack
(
outputs_l
)
complexity_cost
=
kld
/
n_batches
likelihood_cost
=
loss_fn
(
outputs
.
mean
(
0
),
target
)
return
complexity_cost
+
likelihood_cost
return
outputs
,
complexity_cost
+
likelihood_cost
pytorch_widedeep/bayesian_models/_weight_sampler.py
浏览文件 @
f9b1e8cb
"""
The code here is greatly insipired by the code at the Blitz package:
https://github.com/piEsposito/blitz-bayesian-deep-learning
"""
import
math
from
pytorch_widedeep.wdtypes
import
*
# noqa: F403
class
ScaleMixtureGaussianPrior
(
object
):
r
"""Defines the Scale Mixture Prior as proposed in Weight Uncertainty in
Neural Networks (Eq 7 in the original publication)
"""
def
__init__
(
self
,
pi
:
float
,
sigma1
:
float
,
sigma2
:
float
):
super
().
__init__
()
self
.
pi
=
pi
...
...
@@ -19,6 +29,10 @@ class ScaleMixtureGaussianPrior(object):
class
GaussianPosterior
(
object
):
r
"""Defines the Gaussian variational posterior as proposed in Weight
Uncertainty in Neural Networks
"""
def
__init__
(
self
,
param_mu
:
Tensor
,
param_rho
:
Tensor
):
super
().
__init__
()
self
.
param_mu
=
param_mu
...
...
pytorch_widedeep/bayesian_models/bayesian_nn/__init__.py
0 → 100644
浏览文件 @
f9b1e8cb
from
.modules
import
*
# noqa: F401, F403
pytorch_widedeep/bayesian_models/bayesian_nn/modules/__init__.py
0 → 100644
浏览文件 @
f9b1e8cb
from
.bayesian_linear
import
BayesianLinear
from
.bayesian_embedding
import
BayesianEmbedding
pytorch_widedeep/bayesian_models/bayesian_nn/modules/bayesian_embedding.py
0 → 100644
浏览文件 @
f9b1e8cb
"""
The code here is greatly insipired by the code at the Blitz package:
https://github.com/piEsposito/blitz-bayesian-deep-learning
"""
import
torch.nn.functional
as
F
from
torch
import
nn
from
pytorch_widedeep.wdtypes
import
*
# noqa: F403
from
pytorch_widedeep.bayesian_models._weight_sampler
import
(
GaussianPosterior
,
ScaleMixtureGaussianPrior
,
)
from
pytorch_widedeep.bayesian_models._base_bayesian_model
import
(
BayesianModule
,
)
class
BayesianEmbedding
(
BayesianModule
):
r
"""A simple lookup table that looks up embeddings in a fixed dictionary and
size.
Parameters
----------
n_embed: int
number of embeddings. Typically referred as size of the vocabulary
embed_dim: int
Dimension of the embeddings
padding_idx: int, optional, default = None
If specified, the entries at ``padding_idx`` do not contribute to the
gradient; therefore, the embedding vector at ``padding_idx`` is not
updated during training, i.e. it remains as a fixed “pad”. For a
newly constructed Embedding, the embedding vector at ``padding_idx``
will default to all zeros, but can be updated to another value to be
used as the padding vector
max_norm: float, optional, default = None
If given, each embedding vector with norm larger than ``max_norm`` is
renormalized to have norm max_norm
norm_type: float, optional, default = 2.
The p of the p-norm to compute for the ``max_norm`` option.
scale_grad_by_freq: bool, optional, default = False
If given, this will scale gradients by the inverse of frequency of the
words in the mini-batch.
sparse: bool, optional, default = False
If True, gradient w.r.t. weight matrix will be a sparse tensor. See
Notes for more details regarding sparse gradients.
prior_sigma_1: float, default = 1.0
Prior of the sigma parameter for the first of the two weight Gaussian
distributions that will be mixed to produce the prior weight
distribution
prior_sigma_2: float, default = 0.002
Prior of the sigma parameter for the second of the two weight Gaussian
distributions that will be mixed to produce the prior weight
distribution
prior_pi: float, default = 0.8
Scaling factor that will be used to mix the Gaussians to produce the
prior weight distribution
posterior_mu_init: float = 0.0,
The posterior sample of the weights is defined as:
:math:`\mathbf{w} = \mu + log(1 + exp(\rho))`
where :math:`\mu` and :math:`\rho` are both sampled from Gaussian
distributions. ``posterior_mu_init`` is the initial mean value for
the Gaussian distribution from which :math:`\mu` is sampled.
posterior_rho_init: float = -7.0,
The initial mean value for the Gaussian distribution from which `\rho`
is sampled.
Examples
--------
>>> import torch
>>> from pytorch_widedeep.bayesian_models import bayesian_nn as bnn
>>> embedding = bnn.BayesianEmbedding(10, 3)
>>> input = torch.LongTensor([[1,2,4,5],[4,3,2,9]])
>>> out = embedding(input)
"""
def
__init__
(
self
,
n_embed
:
int
,
embed_dim
:
int
,
padding_idx
:
Optional
[
int
]
=
None
,
max_norm
:
Optional
[
float
]
=
None
,
norm_type
:
Optional
[
float
]
=
2.0
,
scale_grad_by_freq
:
Optional
[
bool
]
=
False
,
sparse
:
Optional
[
bool
]
=
False
,
prior_sigma_1
:
float
=
1.0
,
prior_sigma_2
:
float
=
0.002
,
prior_pi
:
float
=
0.25
,
posterior_mu_init
:
float
=
0.0
,
posterior_rho_init
:
float
=
-
3.0
,
):
super
(
BayesianEmbedding
,
self
).
__init__
()
self
.
n_embed
=
n_embed
self
.
embed_dim
=
embed_dim
self
.
padding_idx
=
padding_idx
self
.
max_norm
=
max_norm
self
.
norm_type
=
norm_type
self
.
scale_grad_by_freq
=
scale_grad_by_freq
self
.
sparse
=
sparse
self
.
prior_sigma_1
=
prior_sigma_1
self
.
prior_sigma_2
=
prior_sigma_2
self
.
prior_pi
=
prior_pi
self
.
posterior_mu_init
=
posterior_mu_init
self
.
posterior_rho_init
=
posterior_rho_init
# Variational weight parameters and sample
self
.
weight_mu
=
nn
.
Parameter
(
torch
.
Tensor
(
n_embed
,
embed_dim
).
normal_
(
posterior_mu_init
,
0.1
)
)
self
.
weight_rho
=
nn
.
Parameter
(
torch
.
Tensor
(
n_embed
,
embed_dim
).
normal_
(
posterior_rho_init
,
0.1
)
)
self
.
weight_sampler
=
GaussianPosterior
(
self
.
weight_mu
,
self
.
weight_rho
)
# Prior
self
.
weight_prior_dist
=
ScaleMixtureGaussianPrior
(
self
.
prior_pi
,
self
.
prior_sigma_1
,
self
.
prior_sigma_2
,
)
self
.
log_prior
:
Union
[
Tensor
,
float
]
=
0.0
self
.
log_variational_posterior
:
Union
[
Tensor
,
float
]
=
0.0
def
forward
(
self
,
X
:
Tensor
)
->
Tensor
:
if
not
self
.
training
:
return
F
.
embedding
(
X
,
self
.
weight_mu
,
self
.
padding_idx
,
self
.
max_norm
,
self
.
norm_type
,
self
.
scale_grad_by_freq
,
self
.
sparse
,
)
weight
=
self
.
weight_sampler
.
sample
()
self
.
log_variational_posterior
=
self
.
weight_sampler
.
log_posterior
(
weight
)
self
.
log_prior
=
self
.
weight_prior_dist
.
log_prior
(
weight
)
return
F
.
embedding
(
X
,
weight
,
self
.
padding_idx
,
self
.
max_norm
,
self
.
norm_type
,
self
.
scale_grad_by_freq
,
self
.
sparse
,
)
def
extra_repr
(
self
)
->
str
:
# noqa: C901
s
=
"{n_embed}, {embed_dim}"
if
self
.
padding_idx
is
not
None
:
s
+=
", padding_idx={padding_idx}"
if
self
.
max_norm
is
not
None
:
s
+=
", max_norm={max_norm}"
if
self
.
norm_type
!=
2
:
s
+=
", norm_type={norm_type}"
if
self
.
scale_grad_by_freq
is
not
False
:
s
+=
", scale_grad_by_freq={scale_grad_by_freq}"
if
self
.
sparse
is
not
False
:
s
+=
", sparse=True"
if
self
.
prior_sigma_1
!=
1.0
:
s
+=
", prior_sigma_1={prior_sigma_1}"
if
self
.
prior_sigma_2
!=
0.002
:
s
+=
", prior_sigma_2={prior_sigma_2}"
if
self
.
prior_pi
!=
0.8
:
s
+=
", prior_pi={prior_pi}"
if
self
.
posterior_mu_init
!=
0.0
:
s
+=
", posterior_mu_init={posterior_mu_init}"
if
self
.
posterior_rho_init
!=
-
7.0
:
s
+=
", posterior_rho_init={posterior_rho_init}"
return
s
.
format
(
**
self
.
__dict__
)
pytorch_widedeep/bayesian_models/bayesian_linear.py
→
pytorch_widedeep/bayesian_models/bayesian_
nn/modules/bayesian_
linear.py
浏览文件 @
f9b1e8cb
"""
The code here is greatly insipired by a couple of sources:
the Blitz package: https://github.com/piEsposito/blitz-bayesian-deep-learning and
Weight Uncertainty in Neural Networks post by Nitarshan Rajkumar: https://www.nitarshan.com/bayes-by-backprop/
and references therein
"""
import
torch.nn.functional
as
F
from
torch
import
nn
...
...
@@ -12,16 +22,60 @@ from pytorch_widedeep.bayesian_models._base_bayesian_model import (
class
BayesianLinear
(
BayesianModule
):
r
"""Applies a linear transformation to the incoming data as proposed in Weight
Uncertainity on Neural Networks
Parameters
----------
in_features: int
size of each input sample
out_features: int
size of each output sample
use_bias: bool, default = True
Boolean indicating if an additive bias will be learnt
prior_sigma_1: float, default = 1.0
Prior of the sigma parameter for the first of the two weight Gaussian
distributions that will be mixed to produce the prior weight
distribution
prior_sigma_2: float, default = 0.002
Prior of the sigma parameter for the second of the two weight Gaussian
distributions that will be mixed to produce the prior weight
distribution
prior_pi: float, default = 0.8
Scaling factor that will be used to mix the Gaussians to produce the
prior weight distribution
posterior_mu_init: float = 0.0,
The posterior sample of the weights is defined as:
:math:`\mathbf{w} = \mu + log(1 + exp(\rho))`
where :math:`\mu` and :math:`\rho` are both sampled from Gaussian
distributions. ``posterior_mu_init`` is the initial mean value for
the Gaussian distribution from which :math:`\mu` is sampled.
posterior_rho_init: float = -7.0,
The initial mean value for the Gaussian distribution from which `\rho`
is sampled.
Examples
--------
>>> import torch
>>> from pytorch_widedeep.bayesian_models import bayesian_nn as bnn
>>> linear = bnn.BayesianLinear(10, 6)
>>> input = torch.rand(6, 10)
>>> out = linear(input)
"""
def
__init__
(
self
,
in_features
:
int
,
out_features
:
int
,
use_bias
:
bool
=
True
,
prior_sigma_1
:
float
=
0.1
,
prior_sigma_1
:
float
=
1.0
,
prior_sigma_2
:
float
=
0.002
,
prior_pi
:
float
=
1.0
,
prior_pi
:
float
=
0.8
,
posterior_mu_init
:
float
=
0.0
,
posterior_rho_init
:
float
=
-
6
.0
,
posterior_rho_init
:
float
=
-
7
.0
,
):
super
(
BayesianLinear
,
self
).
__init__
()
...
...
@@ -37,8 +91,7 @@ class BayesianLinear(BayesianModule):
self
.
prior_sigma_2
=
prior_sigma_2
self
.
prior_pi
=
prior_pi
# Variational weight and bias parameters and sample for the posterior
# computation
# Variational Posterior
self
.
weight_mu
=
nn
.
Parameter
(
torch
.
Tensor
(
out_features
,
in_features
).
normal_
(
posterior_mu_init
,
0.1
)
)
...
...
@@ -103,13 +156,13 @@ class BayesianLinear(BayesianModule):
if
self
.
use_bias
is
not
False
:
s
+=
", use_bias=True"
if
self
.
prior_sigma_1
!=
0.1
:
s
+
", prior_sigma_1={prior_sigma_1}"
s
+
=
", prior_sigma_1={prior_sigma_1}"
if
self
.
prior_sigma_2
!=
0.002
:
s
+
", prior_sigma_2={prior_sigma_2}"
if
self
.
prior_pi
!=
1.0
:
s
+
", prior_pi={prior_pi}"
s
+
=
", prior_sigma_2={prior_sigma_2}"
if
self
.
prior_pi
!=
0.8
:
s
+
=
", prior_pi={prior_pi}"
if
self
.
posterior_mu_init
!=
0.0
:
s
+
", posterior_mu_init={posterior_mu_init}"
if
self
.
posterior_rho_init
!=
-
6
.0
:
s
+
", posterior_rho_init={posterior_rho_init}"
s
+
=
", posterior_mu_init={posterior_mu_init}"
if
self
.
posterior_rho_init
!=
-
8
.0
:
s
+
=
", posterior_rho_init={posterior_rho_init}"
return
s
.
format
(
**
self
.
__dict__
)
pytorch_widedeep/bayesian_models/bayesian_embeddings_layers.py
→
pytorch_widedeep/bayesian_models/
tabular/
bayesian_embeddings_layers.py
浏览文件 @
f9b1e8cb
import
numpy
as
np
import
einops
import
torch.nn.functional
as
F
from
torch
import
nn
from
pytorch_widedeep.wdtypes
import
*
# noqa: F403
from
pytorch_widedeep.bayesian_models
import
bayesian_nn
as
bnn
from
pytorch_widedeep.models._get_activation_fn
import
get_activation_fn
from
pytorch_widedeep.bayesian_models._weight_sampler
import
(
GaussianPosterior
,
...
...
@@ -14,150 +14,6 @@ from pytorch_widedeep.bayesian_models._base_bayesian_model import (
)
class
BayesianEmbedding
(
BayesianModule
):
def
__init__
(
self
,
n_embed
:
int
,
embed_dim
:
int
,
padding_idx
:
Optional
[
int
]
=
None
,
max_norm
:
Optional
[
float
]
=
None
,
norm_type
:
Optional
[
float
]
=
2.0
,
scale_grad_by_freq
:
Optional
[
bool
]
=
False
,
sparse
:
Optional
[
bool
]
=
False
,
use_bias
:
bool
=
False
,
prior_sigma_1
:
float
=
0.1
,
prior_sigma_2
:
float
=
0.002
,
prior_pi
:
float
=
1.0
,
posterior_mu_init
:
float
=
0.0
,
posterior_rho_init
:
float
=
-
6.0
,
):
super
(
BayesianEmbedding
,
self
).
__init__
()
self
.
n_embed
=
n_embed
self
.
embed_dim
=
embed_dim
self
.
padding_idx
=
padding_idx
self
.
max_norm
=
max_norm
self
.
norm_type
=
norm_type
self
.
scale_grad_by_freq
=
scale_grad_by_freq
self
.
sparse
=
sparse
self
.
use_bias
=
use_bias
self
.
prior_sigma_1
=
prior_sigma_1
self
.
prior_sigma_2
=
prior_sigma_2
self
.
prior_pi
=
prior_pi
self
.
posterior_mu_init
=
posterior_mu_init
self
.
posterior_rho_init
=
posterior_rho_init
# Variational weight parameters and sample
self
.
weight_mu
=
nn
.
Parameter
(
torch
.
Tensor
(
n_embed
,
embed_dim
).
normal_
(
posterior_mu_init
,
0.1
)
)
self
.
weight_rho
=
nn
.
Parameter
(
torch
.
Tensor
(
n_embed
,
embed_dim
).
normal_
(
posterior_rho_init
,
0.1
)
)
self
.
weight_sampler
=
GaussianPosterior
(
self
.
weight_mu
,
self
.
weight_rho
)
if
self
.
use_bias
:
self
.
bias_mu
:
Union
[
nn
.
Parameter
,
float
]
=
nn
.
Parameter
(
torch
.
Tensor
(
n_embed
).
normal_
(
posterior_mu_init
,
0.1
)
)
self
.
bias_rho
:
Union
[
nn
.
Parameter
,
float
]
=
nn
.
Parameter
(
torch
.
Tensor
(
n_embed
).
normal_
(
posterior_rho_init
,
0.1
)
)
self
.
bias_sampler
=
GaussianPosterior
(
self
.
bias_mu
,
self
.
bias_rho
)
else
:
self
.
bias_mu
,
self
.
bias_rho
=
0.0
,
0.0
# Prior
self
.
weight_prior_dist
=
ScaleMixtureGaussianPrior
(
self
.
prior_pi
,
self
.
prior_sigma_1
,
self
.
prior_sigma_2
,
)
if
self
.
use_bias
:
self
.
bias_prior_dist
=
ScaleMixtureGaussianPrior
(
self
.
prior_pi
,
self
.
prior_sigma_1
,
self
.
prior_sigma_2
,
)
self
.
log_prior
:
Union
[
Tensor
,
float
]
=
0.0
self
.
log_variational_posterior
:
Union
[
Tensor
,
float
]
=
0.0
def
forward
(
self
,
X
:
Tensor
)
->
Tensor
:
if
not
self
.
training
:
return
(
F
.
embedding
(
X
,
self
.
weight_mu
,
self
.
padding_idx
,
self
.
max_norm
,
self
.
norm_type
,
self
.
scale_grad_by_freq
,
self
.
sparse
,
)
+
self
.
bias_mu
)
weight
=
self
.
weight_sampler
.
sample
()
if
self
.
use_bias
:
bias
=
self
.
bias_sampler
.
sample
()
bias_log_posterior
:
Union
[
Tensor
,
float
]
=
self
.
bias_sampler
.
log_posterior
(
bias
)
bias_log_prior
:
Union
[
Tensor
,
float
]
=
self
.
bias_prior_dist
.
log_prior
(
bias
)
else
:
bias
=
None
bias_log_posterior
=
0.0
bias_log_prior
=
0.0
self
.
log_variational_posterior
=
(
self
.
weight_sampler
.
log_posterior
(
weight
)
+
bias_log_posterior
)
self
.
log_prior
=
self
.
weight_prior_dist
.
log_prior
(
weight
)
+
bias_log_prior
return
(
F
.
embedding
(
X
,
weight
,
self
.
padding_idx
,
self
.
max_norm
,
self
.
norm_type
,
self
.
scale_grad_by_freq
,
self
.
sparse
,
)
+
bias
)
def
extra_repr
(
self
)
->
str
:
# noqa: C901
s
=
"{n_embed}, {embed_dim}"
if
self
.
padding_idx
is
not
None
:
s
+=
", padding_idx={padding_idx}"
if
self
.
max_norm
is
not
None
:
s
+=
", max_norm={max_norm}"
if
self
.
norm_type
!=
2
:
s
+=
", norm_type={norm_type}"
if
self
.
scale_grad_by_freq
is
not
False
:
s
+=
", scale_grad_by_freq={scale_grad_by_freq}"
if
self
.
sparse
is
not
False
:
s
+=
", sparse=True"
if
self
.
use_bias
:
s
+=
", use_bias=True"
if
self
.
prior_sigma_1
!=
0.1
:
s
+
", prior_sigma_1={prior_sigma_1}"
if
self
.
prior_sigma_2
!=
0.002
:
s
+
", prior_sigma_2={prior_sigma_2}"
if
self
.
prior_pi
!=
1.0
:
s
+
", prior_pi={prior_pi}"
if
self
.
posterior_mu_init
!=
0.0
:
s
+
", posterior_mu_init={posterior_mu_init}"
if
self
.
posterior_rho_init
!=
-
6.0
:
s
+
", posterior_rho_init={posterior_rho_init}"
return
s
.
format
(
**
self
.
__dict__
)
class
BayesianContEmbeddings
(
BayesianModule
):
def
__init__
(
self
,
...
...
@@ -173,7 +29,10 @@ class BayesianContEmbeddings(BayesianModule):
):
super
(
BayesianContEmbeddings
,
self
).
__init__
()
self
.
n_cont_cols
=
n_cont_cols
self
.
embed_dim
=
embed_dim
self
.
use_bias
=
use_bias
self
.
activation
=
activation
self
.
weight_mu
=
nn
.
Parameter
(
torch
.
Tensor
(
n_cont_cols
,
embed_dim
).
normal_
(
posterior_mu_init
,
0.1
)
...
...
@@ -246,7 +105,7 @@ class BayesianContEmbeddings(BayesianModule):
return
x
def
extra_repr
(
self
)
->
str
:
s
=
"{n_cont_cols}, {embed_dim},
embed_dropout={embed_dropout},
use_bias={use_bias}"
s
=
"{n_cont_cols}, {embed_dim}, use_bias={use_bias}"
if
self
.
activation
is
not
None
:
s
+=
", activation={activation}"
return
s
.
format
(
**
self
.
__dict__
)
...
...
@@ -272,7 +131,7 @@ class BayesianDiffSizeCatEmbeddings(nn.Module):
self
.
embed_layers
=
nn
.
ModuleDict
(
{
"emb_layer_"
+
col
:
BayesianEmbedding
(
+
col
:
bnn
.
BayesianEmbedding
(
val
+
1
,
dim
,
padding_idx
=
0
,
...
...
@@ -303,6 +162,7 @@ class BayesianDiffSizeCatAndContEmbeddings(nn.Module):
column_idx
:
Dict
[
str
,
int
],
cat_embed_input
:
List
[
Tuple
[
str
,
int
,
int
]],
continuous_cols
:
Optional
[
List
[
str
]],
embed_continuous
:
bool
,
cont_embed_dim
:
int
,
cont_embed_activation
:
str
,
use_cont_bias
:
bool
,
...
...
@@ -317,6 +177,8 @@ class BayesianDiffSizeCatAndContEmbeddings(nn.Module):
self
.
cat_embed_input
=
cat_embed_input
self
.
continuous_cols
=
continuous_cols
self
.
embed_continuous
=
embed_continuous
self
.
cont_embed_dim
=
cont_embed_dim
# Categorical
if
self
.
cat_embed_input
is
not
None
:
...
...
@@ -342,18 +204,21 @@ class BayesianDiffSizeCatAndContEmbeddings(nn.Module):
self
.
cont_norm
=
nn
.
BatchNorm1d
(
len
(
continuous_cols
))
else
:
self
.
cont_norm
=
nn
.
Identity
()
self
.
cont_embed
=
BayesianContEmbeddings
(
len
(
continuous_cols
),
cont_embed_dim
,
prior_sigma_1
,
prior_sigma_2
,
prior_pi
,
posterior_mu_init
,
posterior_rho_init
,
use_cont_bias
,
cont_embed_activation
,
)
self
.
cont_out_dim
=
len
(
continuous_cols
)
*
cont_embed_dim
if
self
.
embed_continuous
:
self
.
cont_embed
=
BayesianContEmbeddings
(
len
(
continuous_cols
),
cont_embed_dim
,
prior_sigma_1
,
prior_sigma_2
,
prior_pi
,
posterior_mu_init
,
posterior_rho_init
,
use_cont_bias
,
cont_embed_activation
,
)
self
.
cont_out_dim
=
len
(
continuous_cols
)
*
cont_embed_dim
else
:
self
.
cont_out_dim
=
len
(
continuous_cols
)
else
:
self
.
cont_out_dim
=
0
...
...
pytorch_widedeep/bayesian_models/tabular/bayesian_linear/bayesian_wide.py
浏览文件 @
f9b1e8cb
from
torch
import
nn
from
pytorch_widedeep.wdtypes
import
*
# noqa: F403
from
pytorch_widedeep.bayesian_models
import
bayesian_nn
as
bnn
from
pytorch_widedeep.bayesian_models._base_bayesian_model
import
(
BaseBayesianModel
,
)
from
pytorch_widedeep.bayesian_models.bayesian_embeddings_layers
import
(
BayesianEmbedding
,
)
class
BayesianWide
(
BaseBayesianModel
):
r
"""Creates a so called Wide model. This is a linear model where the
non-linearlities are captured via crossed-columns
The model implemented via a Bayesian Embedding layer connected to the
output neuron(s).
Parameters
----------
input_dim: int
size of the Embedding layer. `input_dim` is the summation of all the
individual values for all the features that go through the wide
component. For example, if the wide component receives 2 features with
5 individual values each, `input_dim = 10`
pred_dim: int
size of the ouput tensor containing the predictions
prior_sigma_1: float, default = 1.0
Prior of the sigma parameter for the first of the two weight Gaussian
distributions that will be mixed to produce the prior weight
distribution
prior_sigma_2: float, default = 0.002
Prior of the sigma parameter for the second of the two weight Gaussian
distributions that will be mixed to produce the prior weight
distribution
prior_pi: float, default = 0.8
Scaling factor that will be used to mix the Gaussians to produce the
prior weight distribution
posterior_mu_init: float = 0.0,
The posterior sample of the weights of the Bayesian Embedding layer is
defined as:
:math:`\mathbf{w} = \mu + log(1 + exp(\rho))`
where :math:`\mu` and :math:`\rho` are both sampled from Gaussian
distributions. ``posterior_mu_init`` is the initial mean value for
the Gaussian distribution from which :math:`\mu` is sampled.
posterior_rho_init: float = -7.0,
The initial mean value for the Gaussian distribution from
which :math:`\rho` is sampled.
Attributes
-----------
bayesian_wide_linear: ``nn.Module``
the linear layer that comprises the wide branch of the model
Examples
--------
>>> import torch
>>> from pytorch_widedeep.bayesian_models import BayesianWide
>>> X = torch.empty(4, 4).random_(6)
>>> wide = BayesianWide(input_dim=X.unique().size(0), pred_dim=1)
>>> out = wide(X)
"""
def
__init__
(
self
,
input_dim
:
int
,
pred_dim
:
int
=
1
,
prior_sigma_1
:
float
=
0.75
,
prior_sigma_2
:
float
=
1
,
prior_pi
:
float
=
0.
25
,
posterior_mu_init
:
float
=
0.
1
,
posterior_rho_init
:
float
=
-
3
.0
,
prior_sigma_1
:
float
=
1.0
,
prior_sigma_2
:
float
=
0.002
,
prior_pi
:
float
=
0.
8
,
posterior_mu_init
:
float
=
0.
0
,
posterior_rho_init
:
float
=
-
8
.0
,
):
super
(
BayesianWide
,
self
).
__init__
()
self
.
bayesian_wide_linear
=
BayesianEmbedding
(
n_embed
=
input_dim
,
# Embeddings: val + 1 because 0 is reserved for padding/unseen cateogories.
self
.
bayesian_wide_linear
=
bnn
.
BayesianEmbedding
(
n_embed
=
input_dim
+
1
,
embed_dim
=
pred_dim
,
padding_idx
=
0
,
use_bias
=
True
,
prior_sigma_1
=
prior_sigma_1
,
prior_sigma_2
=
prior_sigma_2
,
prior_pi
=
prior_pi
,
posterior_mu_init
=
posterior_mu_init
,
posterior_rho_init
=
posterior_rho_init
,
)
self
.
bias
=
nn
.
Parameter
(
torch
.
zeros
(
pred_dim
))
def
forward
(
self
,
X
:
Tensor
)
->
Tensor
:
out
=
self
.
bayesian_wide_linear
(
X
.
long
()).
sum
(
dim
=
1
)
out
=
self
.
bayesian_wide_linear
(
X
.
long
()).
sum
(
dim
=
1
)
+
self
.
bias
return
out
pytorch_widedeep/bayesian_models/tabular/bayesian_mlp/_layers.py
浏览文件 @
f9b1e8cb
from
torch
import
nn
from
pytorch_widedeep.wdtypes
import
*
# noqa: F403
from
pytorch_widedeep.bayesian_models
import
bayesian_nn
as
bnn
from
pytorch_widedeep.models._get_activation_fn
import
get_activation_fn
from
pytorch_widedeep.bayesian_models.bayesian_linear
import
BayesianLinear
class
BayesianMLP
(
nn
.
Module
):
...
...
@@ -27,7 +27,7 @@ class BayesianMLP(nn.Module):
for
i
in
range
(
1
,
len
(
d_hidden
)):
bayesian_dense_layer
=
nn
.
Sequential
(
*
[
BayesianLinear
(
bnn
.
BayesianLinear
(
d_hidden
[
i
-
1
],
d_hidden
[
i
],
use_bias
,
...
...
pytorch_widedeep/bayesian_models/tabular/bayesian_mlp/bayesian_tab_mlp.py
浏览文件 @
f9b1e8cb
...
...
@@ -5,15 +5,101 @@ from pytorch_widedeep.models._get_activation_fn import allowed_activations
from
pytorch_widedeep.bayesian_models._base_bayesian_model
import
(
BaseBayesianModel
,
)
from
pytorch_widedeep.bayesian_models.bayesian_embeddings_layers
import
(
BayesianDiffSizeCatAndContEmbeddings
,
)
from
pytorch_widedeep.bayesian_models.tabular.bayesian_mlp._layers
import
(
BayesianMLP
,
)
from
pytorch_widedeep.bayesian_models.tabular.bayesian_embeddings_layers
import
(
BayesianDiffSizeCatAndContEmbeddings
,
)
class
BayesianTabMlp
(
BaseBayesianModel
):
r
"""Defines a ``TabMlp`` model that can be used as the ``deeptabular``
component of a Wide & Deep model.
This class combines embedding representations of the categorical features
with numerical (aka continuous) features. These are then passed through a
series of dense layers (i.e. a MLP).
Parameters
----------
column_idx: Dict
Dict containing the index of the columns that will be passed through
the ``TabMlp`` model. Required to slice the tensors. e.g. {'education':
0, 'relationship': 1, 'workclass': 2, ...}
cat_embed_input: List, Optional, default = None
List of Tuples with the column name, number of unique values and
embedding dimension. e.g. [(education, 11, 32), ...]
cat_embed_dropout: float, default = 0.1
embeddings dropout
continuous_cols: List, Optional, default = None
List with the name of the numeric (aka continuous) columns
embed_continuous: bool, default = False,
Boolean indicating if the continuous columns will be embedded
(i.e. passed each through a linear layer with or without activation)
cont_embed_dim: int, default = 32,
Size of the continuous embeddings
cont_embed_dropout: float, default = 0.1,
Dropout for the continuous embeddings
cont_embed_activation: Optional, str, default = None,
Activation function for the continuous embeddings
use_cont_bias: bool, default = True,
Boolean indicating in bias will be used for the continuous embeddings
cont_norm_layer: str, default = "batchnorm"
Type of normalization layer applied to the continuous features. Options
are: 'layernorm', 'batchnorm' or None.
mlp_hidden_dims: List, default = [200, 100]
List with the number of neurons per dense layer in the mlp.
mlp_activation: str, default = "relu"
Activation function for the dense layers of the MLP. Currently
``tanh``, ``relu``, ``leaky_relu`` and ``gelu`` are supported
prior_sigma_1: float, default = 1.0
Prior of the sigma parameter for the first of the two weight Gaussian
distributions that will be mixed to produce the prior weight
distribution for each Bayesian linear and embedding layer
prior_sigma_2: float, default = 0.002
Prior of the sigma parameter for the second of the two weight Gaussian
distributions that will be mixed to produce the prior weight
distribution for each Bayesian linear and embedding layer
prior_pi: float, default = 0.8
Scaling factor that will be used to mix the Gaussians to produce the
prior weight distribution ffor each Bayesian linear and embedding
layer
posterior_mu_init: float = 0.0,
The posterior sample of the weights is defined as:
:math:`\mathbf{w} = \mu + log(1 + exp(\rho))`
where :math:`\mu` and :math:`\rho` are both sampled from Gaussian
distributions. ``posterior_mu_init`` is the initial mean value for
the Gaussian distribution from which :math:`\mu` is sampled for each
Bayesian linear and embedding layers.
posterior_rho_init: float = -7.0,
The initial mean value for the Gaussian distribution from
which :math:`\rho` is sampled for each Bayesian linear and embedding
layers.
Attributes
----------
bayesian_cat_and_cont_embed: ``nn.Module``
This is the module that processes the categorical and continuous columns
bayesian_tab_mlp: ``nn.Sequential``
mlp model that will receive the concatenation of the embeddings and
the continuous columns
Example
--------
>>> import torch
>>> from pytorch_widedeep.bayesian_models import BayesianTabMlp
>>> X_tab = torch.cat((torch.empty(5, 4).random_(4), torch.rand(5, 1)), axis=1)
>>> colnames = ['a', 'b', 'c', 'd', 'e']
>>> cat_embed_input = [(u,i,j) for u,i,j in zip(colnames[:4], [4]*4, [8]*4)]
>>> column_idx = {k:v for v,k in enumerate(colnames)}
>>> model = BayesianTabMlp(mlp_hidden_dims=[8,4], column_idx=column_idx, cat_embed_input=cat_embed_input,
... continuous_cols = ['e'])
>>> out = model(X_tab)
"""
def
__init__
(
self
,
column_idx
:
Dict
[
str
,
int
],
...
...
@@ -27,12 +113,11 @@ class BayesianTabMlp(BaseBayesianModel):
use_cont_bias
:
bool
=
True
,
cont_norm_layer
:
str
=
"batchnorm"
,
mlp_hidden_dims
:
List
[
int
]
=
[
200
,
100
],
mlp_activation
:
str
=
"relu"
,
use_bias
:
bool
=
True
,
mlp_activation
:
str
=
"leaky_relu"
,
prior_sigma_1
:
float
=
0.75
,
prior_sigma_2
:
float
=
0.1
,
prior_pi
:
float
=
0.25
,
posterior_mu_init
:
float
=
0.
1
,
posterior_mu_init
:
float
=
0.
0
,
posterior_rho_init
:
float
=
-
3.0
,
pred_dim
=
1
,
# Bayesian models will require their own trainer and need the output layer
):
...
...
@@ -52,7 +137,6 @@ class BayesianTabMlp(BaseBayesianModel):
self
.
mlp_hidden_dims
=
mlp_hidden_dims
self
.
mlp_activation
=
mlp_activation
self
.
use_bias
=
use_bias
self
.
prior_sigma_1
=
prior_sigma_1
self
.
prior_sigma_2
=
prior_sigma_2
self
.
prior_pi
=
prior_pi
...
...
@@ -73,6 +157,7 @@ class BayesianTabMlp(BaseBayesianModel):
column_idx
,
cat_embed_input
,
continuous_cols
,
embed_continuous
,
cont_embed_dim
,
cont_embed_activation
,
use_cont_bias
,
...
...
@@ -89,7 +174,7 @@ class BayesianTabMlp(BaseBayesianModel):
self
.
bayesian_tab_mlp
=
BayesianMLP
(
mlp_hidden_dims
,
mlp_activation
,
use_bias
,
True
,
# use_bias
prior_sigma_1
,
prior_sigma_2
,
prior_pi
,
...
...
pytorch_widedeep/losses.py
浏览文件 @
f9b1e8cb
...
...
@@ -304,13 +304,21 @@ class RMSLELoss(nn.Module):
class
BayesianRegressionLoss
(
nn
.
Module
):
def
__init__
(
self
,
noise_tolerance
:
float
=
0.2
):
def
__init__
(
self
,
noise_tolerance
:
float
):
super
().
__init__
()
self
.
noise_tolerance
=
noise_tolerance
def
forward
(
self
,
input
:
Tensor
,
target
:
Tensor
)
->
Tensor
:
return
(
torch
.
distributions
.
Normal
(
input
,
self
.
noise_tolerance
)
-
torch
.
distributions
.
Normal
(
input
,
self
.
noise_tolerance
)
.
log_prob
(
target
)
.
sum
()
)
class
BayesianSELoss
(
nn
.
Module
):
def
__init__
(
self
):
super
().
__init__
()
def
forward
(
self
,
input
:
Tensor
,
target
:
Tensor
)
->
Tensor
:
return
(
0.5
*
(
input
-
target
)
**
2
).
sum
()
pytorch_widedeep/training/_trainer_utils.py
浏览文件 @
f9b1e8cb
import
numpy
as
np
import
torch
from
tqdm
import
tqdm
from
torch
import
nn
from
torch.utils.data
import
TensorDataset
from
sklearn.model_selection
import
train_test_split
from
pytorch_widedeep.losses
import
(
...
...
@@ -11,7 +13,7 @@ from pytorch_widedeep.losses import (
RMSLELoss
,
TweedieLoss
,
QuantileLoss
,
Bayesian
Regression
Loss
,
Bayesian
SE
Loss
,
)
from
pytorch_widedeep.wdtypes
import
Dict
,
List
,
Optional
,
Transforms
from
pytorch_widedeep.training._wd_dataset
import
WideDeepDataset
...
...
@@ -21,6 +23,80 @@ from pytorch_widedeep.training._loss_and_obj_aliases import (
)
def
tabular_train_val_split
(
seed
:
int
,
method
:
str
,
X
:
np
.
ndarray
,
y
:
np
.
ndarray
,
X_val
:
Optional
[
np
.
ndarray
]
=
None
,
y_val
:
Optional
[
np
.
ndarray
]
=
None
,
val_split
:
Optional
[
float
]
=
None
,
):
r
"""
Function to create the train/val split for the BayesianTrainer where only
tabular data is present
Parameters
----------
seed: int
random seed to be used during train/val split
method: str
'regression', 'binary' or 'multiclass'
X: np.ndarray
tabular dataset (categorical and continuous features)
y: np.ndarray
X_val: np.ndarray, Optional, default = None
Dict with the validation set, where the keys are the component names
(e.g: 'wide') and the values the corresponding arrays
y_val: np.ndarray, Optional, default = None
Returns
-------
train_set: ``TensorDataset``
eval_set: ``TensorDataset``
"""
if
X_val
is
not
None
:
assert
(
y_val
is
not
None
),
"if X_val is not None the validation target 'y_val' must also be specified"
train_set
=
TensorDataset
(
torch
.
from_numpy
(
X
),
torch
.
from_numpy
(
y
),
)
eval_set
=
TensorDataset
(
torch
.
from_numpy
(
X_val
),
torch
.
from_numpy
(
y_val
),
)
elif
val_split
is
not
None
:
y_tr
,
y_val
,
idx_tr
,
idx_val
=
train_test_split
(
y
,
np
.
arange
(
len
(
y
)),
test_size
=
val_split
,
random_state
=
seed
,
stratify
=
y
if
method
!=
"regression"
else
None
,
)
X_tr
,
X_val
=
X
[
idx_tr
],
X
[
idx_val
]
train_set
=
TensorDataset
(
torch
.
from_numpy
(
X_tr
),
torch
.
from_numpy
(
y_tr
),
)
eval_set
=
TensorDataset
(
torch
.
from_numpy
(
X_val
),
torch
.
from_numpy
(
y_val
),
)
else
:
train_set
=
TensorDataset
(
torch
.
from_numpy
(
X
),
torch
.
from_numpy
(
y
),
)
eval_set
=
None
return
train_set
,
eval_set
def
wd_train_val_split
(
# noqa: C901
seed
:
int
,
method
:
str
,
...
...
@@ -185,6 +261,34 @@ def save_epoch_logs(epoch_logs: Dict, loss: float, score: Dict, stage: str):
return
epoch_logs
def
bayesian_alias_to_loss
(
loss_fn
:
str
,
**
kwargs
):
r
"""
Function that returns the corresponding loss function given an alias
Parameters
----------
loss_fn: str
Loss name
Returns
-------
Object
loss function
Examples
--------
>>> from pytorch_widedeep.training._trainer_utils import bayesian_alias_to_loss
>>> loss_fn = bayesian_alias_to_loss(loss_fn="binary", weight=None)
"""
if
loss_fn
==
"binary"
:
return
nn
.
BCEWithLogitsLoss
(
pos_weight
=
kwargs
[
"weight"
],
reduction
=
"sum"
)
if
loss_fn
==
"multiclass"
:
return
nn
.
CrossEntropyLoss
(
weight
=
kwargs
[
"weight"
],
reduction
=
"sum"
)
if
loss_fn
==
"regression"
:
return
BayesianSELoss
()
# return BayesianRegressionLoss(noise_tolerance=kwargs["noise_tolerance"])
def
alias_to_loss
(
loss_fn
:
str
,
**
kwargs
):
# noqa: C901
r
"""
Function that returns the corresponding loss function given an alias
...
...
@@ -232,9 +336,3 @@ def alias_to_loss(loss_fn: str, **kwargs): # noqa: C901
return
TweedieLoss
()
if
"focal_loss"
in
loss_fn
:
return
FocalLoss
(
**
kwargs
)
if
"bayesian_binary"
in
loss_fn
:
return
nn
.
BCEWithLogitsLoss
(
pos_weight
=
kwargs
[
"weight"
],
reduction
=
"sum"
)
if
"bayesian_multiclass"
in
loss_fn
:
return
nn
.
CrossEntropyLoss
(
weight
=
kwargs
[
"weight"
],
reduction
=
"sum"
)
if
"bayesian_regression"
in
loss_fn
:
return
BayesianRegressionLoss
(
noise_tolerance
=
kwargs
[
"noise_tolerance"
])
pytorch_widedeep/training/bayesian_trainer.py
0 → 100644
浏览文件 @
f9b1e8cb
此差异已折叠。
点击以展开。
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录