Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Pytorch Widedeep
提交
ec183b63
P
Pytorch Widedeep
项目概览
Greenplum
/
Pytorch Widedeep
10 个月 前同步成功
通知
9
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Pytorch Widedeep
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
ec183b63
编写于
12月 02, 2021
作者:
P
Pavol Mulinka
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fds initial
上级
cb6defbe
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
167 addition
and
0 deletion
+167
-0
pytorch_widedeep/models/fds.py
pytorch_widedeep/models/fds.py
+158
-0
pytorch_widedeep/models/wide_deep.py
pytorch_widedeep/models/wide_deep.py
+9
-0
未找到文件。
pytorch_widedeep/models/fds.py
0 → 100644
浏览文件 @
ec183b63
# all credits go to
# `Yang, Y., Zha, K., Chen, Y. C., Wang, H., & Katabi, D. (2021).
# Delving into Deep Imbalanced Regression. arXiv preprint arXiv:2102.09554.`
# and their `implementation
# <https://github.com/YyzHarry/imbalanced-regression>`
import
numpy
as
np
from
scipy.ndimage
import
gaussian_filter1d
from
scipy.signal.windows
import
triang
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
def
calibrate_mean_var
(
matrix
,
m1
,
v1
,
m2
,
v2
,
clip_min
=
0.1
,
clip_max
=
10
):
if
torch
.
sum
(
v1
)
<
1e-10
:
return
matrix
if
(
v1
==
0.
).
any
():
valid
=
(
v1
!=
0.
)
factor
=
torch
.
clamp
(
v2
[
valid
]
/
v1
[
valid
],
clip_min
,
clip_max
)
matrix
[:,
valid
]
=
(
matrix
[:,
valid
]
-
m1
[
valid
])
*
torch
.
sqrt
(
factor
)
+
m2
[
valid
]
return
matrix
factor
=
torch
.
clamp
(
v2
/
v1
,
clip_min
,
clip_max
)
return
(
matrix
-
m1
)
*
torch
.
sqrt
(
factor
)
+
m2
class
FDS
(
nn
.
Module
):
def
__init__
(
self
,
feature_dim
,
bucket_num
=
100
,
bucket_start
=
3
,
start_update
=
0
,
start_smooth
=
1
,
kernel
=
'gaussian'
,
ks
=
5
,
sigma
=
2
,
momentum
=
0.9
):
super
(
FDS
,
self
).
__init__
()
self
.
feature_dim
=
feature_dim
self
.
bucket_num
=
bucket_num
self
.
bucket_start
=
bucket_start
self
.
kernel_window
=
self
.
_get_kernel_window
(
kernel
,
ks
,
sigma
)
self
.
half_ks
=
(
ks
-
1
)
//
2
self
.
momentum
=
momentum
self
.
start_update
=
start_update
self
.
start_smooth
=
start_smooth
self
.
register_buffer
(
'epoch'
,
torch
.
zeros
(
1
).
fill_
(
start_update
))
self
.
register_buffer
(
'running_mean'
,
torch
.
zeros
(
bucket_num
-
bucket_start
,
feature_dim
))
self
.
register_buffer
(
'running_var'
,
torch
.
ones
(
bucket_num
-
bucket_start
,
feature_dim
))
self
.
register_buffer
(
'running_mean_last_epoch'
,
torch
.
zeros
(
bucket_num
-
bucket_start
,
feature_dim
))
self
.
register_buffer
(
'running_var_last_epoch'
,
torch
.
ones
(
bucket_num
-
bucket_start
,
feature_dim
))
self
.
register_buffer
(
'smoothed_mean_last_epoch'
,
torch
.
zeros
(
bucket_num
-
bucket_start
,
feature_dim
))
self
.
register_buffer
(
'smoothed_var_last_epoch'
,
torch
.
ones
(
bucket_num
-
bucket_start
,
feature_dim
))
self
.
register_buffer
(
'num_samples_tracked'
,
torch
.
zeros
(
bucket_num
-
bucket_start
))
@
staticmethod
def
_get_kernel_window
(
kernel
,
ks
,
sigma
):
assert
kernel
in
[
'gaussian'
,
'triang'
,
'laplace'
]
half_ks
=
(
ks
-
1
)
//
2
if
kernel
==
'gaussian'
:
base_kernel
=
[
0.
]
*
half_ks
+
[
1.
]
+
[
0.
]
*
half_ks
base_kernel
=
np
.
array
(
base_kernel
,
dtype
=
np
.
float32
)
kernel_window
=
gaussian_filter1d
(
base_kernel
,
sigma
=
sigma
)
/
sum
(
gaussian_filter1d
(
base_kernel
,
sigma
=
sigma
))
elif
kernel
==
'triang'
:
kernel_window
=
triang
(
ks
)
/
sum
(
triang
(
ks
))
else
:
laplace
=
lambda
x
:
np
.
exp
(
-
abs
(
x
)
/
sigma
)
/
(
2.
*
sigma
)
kernel_window
=
list
(
map
(
laplace
,
np
.
arange
(
-
half_ks
,
half_ks
+
1
)))
/
sum
(
map
(
laplace
,
np
.
arange
(
-
half_ks
,
half_ks
+
1
)))
print
(
f
'Using FDS: [
{
kernel
.
upper
()
}
] (
{
ks
}
/
{
sigma
}
)'
)
return
torch
.
tensor
(
kernel_window
,
dtype
=
torch
.
float32
).
cuda
()
def
_update_last_epoch_stats
(
self
):
self
.
running_mean_last_epoch
=
self
.
running_mean
self
.
running_var_last_epoch
=
self
.
running_var
self
.
smoothed_mean_last_epoch
=
F
.
conv1d
(
input
=
F
.
pad
(
self
.
running_mean_last_epoch
.
unsqueeze
(
1
).
permute
(
2
,
1
,
0
),
pad
=
(
self
.
half_ks
,
self
.
half_ks
),
mode
=
'reflect'
),
weight
=
self
.
kernel_window
.
view
(
1
,
1
,
-
1
),
padding
=
0
).
permute
(
2
,
1
,
0
).
squeeze
(
1
)
self
.
smoothed_var_last_epoch
=
F
.
conv1d
(
input
=
F
.
pad
(
self
.
running_var_last_epoch
.
unsqueeze
(
1
).
permute
(
2
,
1
,
0
),
pad
=
(
self
.
half_ks
,
self
.
half_ks
),
mode
=
'reflect'
),
weight
=
self
.
kernel_window
.
view
(
1
,
1
,
-
1
),
padding
=
0
).
permute
(
2
,
1
,
0
).
squeeze
(
1
)
def
reset
(
self
):
self
.
running_mean
.
zero_
()
self
.
running_var
.
fill_
(
1
)
self
.
running_mean_last_epoch
.
zero_
()
self
.
running_var_last_epoch
.
fill_
(
1
)
self
.
smoothed_mean_last_epoch
.
zero_
()
self
.
smoothed_var_last_epoch
.
fill_
(
1
)
self
.
num_samples_tracked
.
zero_
()
def
update_last_epoch_stats
(
self
,
epoch
):
if
epoch
==
self
.
epoch
+
1
:
self
.
epoch
+=
1
self
.
_update_last_epoch_stats
()
print
(
f
"Updated smoothed statistics on Epoch [
{
epoch
}
]!"
)
def
update_running_stats
(
self
,
features
,
labels
,
epoch
):
if
epoch
<
self
.
epoch
:
return
assert
self
.
feature_dim
==
features
.
size
(
1
),
"Input feature dimension is not aligned!"
assert
features
.
size
(
0
)
==
labels
.
size
(
0
),
"Dimensions of features and labels are not aligned!"
for
label
in
torch
.
unique
(
labels
):
if
label
>
self
.
bucket_num
-
1
or
label
<
self
.
bucket_start
:
continue
elif
label
==
self
.
bucket_start
:
curr_feats
=
features
[
labels
<=
label
]
elif
label
==
self
.
bucket_num
-
1
:
curr_feats
=
features
[
labels
>=
label
]
else
:
curr_feats
=
features
[
labels
==
label
]
curr_num_sample
=
curr_feats
.
size
(
0
)
curr_mean
=
torch
.
mean
(
curr_feats
,
0
)
curr_var
=
torch
.
var
(
curr_feats
,
0
,
unbiased
=
True
if
curr_feats
.
size
(
0
)
!=
1
else
False
)
self
.
num_samples_tracked
[
int
(
label
-
self
.
bucket_start
)]
+=
curr_num_sample
factor
=
self
.
momentum
if
self
.
momentum
is
not
None
else
\
(
1
-
curr_num_sample
/
float
(
self
.
num_samples_tracked
[
int
(
label
-
self
.
bucket_start
)]))
factor
=
0
if
epoch
==
self
.
start_update
else
factor
self
.
running_mean
[
int
(
label
-
self
.
bucket_start
)]
=
\
(
1
-
factor
)
*
curr_mean
+
factor
*
self
.
running_mean
[
int
(
label
-
self
.
bucket_start
)]
self
.
running_var
[
int
(
label
-
self
.
bucket_start
)]
=
\
(
1
-
factor
)
*
curr_var
+
factor
*
self
.
running_var
[
int
(
label
-
self
.
bucket_start
)]
print
(
f
"Updated running statistics with Epoch [
{
epoch
}
] features!"
)
def
smooth
(
self
,
features
,
labels
,
epoch
):
if
epoch
<
self
.
start_smooth
:
return
features
labels
=
labels
.
squeeze
(
1
)
for
label
in
torch
.
unique
(
labels
):
if
label
>
self
.
bucket_num
-
1
or
label
<
self
.
bucket_start
:
continue
elif
label
==
self
.
bucket_start
:
features
[
labels
<=
label
]
=
calibrate_mean_var
(
features
[
labels
<=
label
],
self
.
running_mean_last_epoch
[
int
(
label
-
self
.
bucket_start
)],
self
.
running_var_last_epoch
[
int
(
label
-
self
.
bucket_start
)],
self
.
smoothed_mean_last_epoch
[
int
(
label
-
self
.
bucket_start
)],
self
.
smoothed_var_last_epoch
[
int
(
label
-
self
.
bucket_start
)])
elif
label
==
self
.
bucket_num
-
1
:
features
[
labels
>=
label
]
=
calibrate_mean_var
(
features
[
labels
>=
label
],
self
.
running_mean_last_epoch
[
int
(
label
-
self
.
bucket_start
)],
self
.
running_var_last_epoch
[
int
(
label
-
self
.
bucket_start
)],
self
.
smoothed_mean_last_epoch
[
int
(
label
-
self
.
bucket_start
)],
self
.
smoothed_var_last_epoch
[
int
(
label
-
self
.
bucket_start
)])
else
:
features
[
labels
==
label
]
=
calibrate_mean_var
(
features
[
labels
==
label
],
self
.
running_mean_last_epoch
[
int
(
label
-
self
.
bucket_start
)],
self
.
running_var_last_epoch
[
int
(
label
-
self
.
bucket_start
)],
self
.
smoothed_mean_last_epoch
[
int
(
label
-
self
.
bucket_start
)],
self
.
smoothed_var_last_epoch
[
int
(
label
-
self
.
bucket_start
)])
return
features
\ No newline at end of file
pytorch_widedeep/models/wide_deep.py
浏览文件 @
ec183b63
...
...
@@ -18,6 +18,7 @@ import torch.nn as nn
from
pytorch_widedeep.wdtypes
import
*
# noqa: F403
from
pytorch_widedeep.models.tab_mlp
import
MLP
,
get_activation_fn
from
pytorch_widedeep.models.tabnet.tab_net
import
TabNetPredLayer
from
pytorch_widedeep.models
import
fds
warnings
.
filterwarnings
(
"default"
,
category
=
UserWarning
)
...
...
@@ -94,6 +95,9 @@ class WideDeep(nn.Module):
enforce_positive_activation: str, default = "softplus"
Activation function to enforce positive output from final layer. Use
"softplus" or "relu".
fds: bool, default = False
If the feature distribution smoothing layer should be applied before the
final prediction layer. Only available for objective='regressor'.
pred_dim: int, default = 1
Size of the final wide and deep output layer containing the
predictions. `1` for regression and binary classification or number
...
...
@@ -141,6 +145,7 @@ class WideDeep(nn.Module):
enforce_positive
:
bool
=
False
,
enforce_positive_activation
:
str
=
"softplus"
,
pred_dim
:
int
=
1
,
fds
:
bool
=
False
,
):
super
(
WideDeep
,
self
).
__init__
()
...
...
@@ -165,6 +170,10 @@ class WideDeep(nn.Module):
self
.
deephead
=
deephead
self
.
enforce_positive
=
enforce_positive
if
fds
:
config
=
dict
(
feature_dim
=
self
.
deeptabular
.
output_dim
,
start_update
=
0
,
start_smooth
=
1
,
kernel
=
'gaussian'
,
ks
=
5
,
sigma
=
2
)
self
.
FDS
=
fds
.
FDS
(
**
config
)
if
self
.
deeptabular
is
not
None
:
self
.
is_tabnet
=
deeptabular
.
__class__
.
__name__
==
"TabNet"
else
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录