Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Pytorch Widedeep
提交
e2b8ccbe
P
Pytorch Widedeep
项目概览
Greenplum
/
Pytorch Widedeep
10 个月 前同步成功
通知
9
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Pytorch Widedeep
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
e2b8ccbe
编写于
10月 24, 2019
作者:
J
jrzaurin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
added documentation for model components. Arranged modules to expose utils
上级
333ffc9c
变更
16
展开全部
隐藏空白更改
内联
并排
Showing
16 changed file
with
586 addition
and
124 deletion
+586
-124
examples/main_adult.py
examples/main_adult.py
+4
-5
examples/main_airbnb.py
examples/main_airbnb.py
+5
-10
pytorch_widedeep/callbacks.py
pytorch_widedeep/callbacks.py
+4
-2
pytorch_widedeep/models/deep_dense.py
pytorch_widedeep/models/deep_dense.py
+49
-1
pytorch_widedeep/models/deep_image.py
pytorch_widedeep/models/deep_image.py
+54
-17
pytorch_widedeep/models/deep_text.py
pytorch_widedeep/models/deep_text.py
+71
-7
pytorch_widedeep/models/wide.py
pytorch_widedeep/models/wide.py
+24
-0
pytorch_widedeep/models/wide_deep.py
pytorch_widedeep/models/wide_deep.py
+360
-67
pytorch_widedeep/preprocessing/__init__.py
pytorch_widedeep/preprocessing/__init__.py
+1
-1
pytorch_widedeep/preprocessing/_preprocessors.py
pytorch_widedeep/preprocessing/_preprocessors.py
+5
-4
pytorch_widedeep/utils/__init__.py
pytorch_widedeep/utils/__init__.py
+0
-0
pytorch_widedeep/utils/dense_utils.py
pytorch_widedeep/utils/dense_utils.py
+1
-1
pytorch_widedeep/utils/fastai_transforms.py
pytorch_widedeep/utils/fastai_transforms.py
+1
-1
pytorch_widedeep/utils/image_utils.py
pytorch_widedeep/utils/image_utils.py
+1
-1
pytorch_widedeep/utils/text_utils.py
pytorch_widedeep/utils/text_utils.py
+5
-3
pytorch_widedeep/wdtypes.py
pytorch_widedeep/wdtypes.py
+1
-4
未找到文件。
examples/main_adult.py
浏览文件 @
e2b8ccbe
...
...
@@ -31,7 +31,6 @@ if __name__ == '__main__':
(
'occupation'
,
10
),(
'native_country'
,
10
)]
continuous_cols
=
[
"age"
,
"hours_per_week"
]
target
=
'income_label'
target
=
df
[
target
].
values
prepare_wide
=
WidePreprocessor
(
wide_cols
=
wide_cols
,
crossed_cols
=
crossed_cols
)
X_wide
=
prepare_wide
.
fit_transform
(
df
)
...
...
@@ -42,7 +41,7 @@ if __name__ == '__main__':
output_dim
=
1
)
deepdense
=
DeepDense
(
hidden_layers
=
[
64
,
32
],
dropout
=
[
0.
5
],
dropout
=
[
0.
2
,
0.2
],
deep_column_idx
=
prepare_deep
.
deep_column_idx
,
embed_input
=
prepare_deep
.
embeddings_input
,
continuous_cols
=
continuous_cols
)
...
...
@@ -56,14 +55,14 @@ if __name__ == '__main__':
optimizers
=
{
'wide'
:
wide_opt
,
'deepdense'
:
deep_opt
}
schedulers
=
{
'wide'
:
wide_sch
,
'deepdense'
:
deep_sch
}
initializers
=
{
'wide'
:
KaimingNormal
,
'deepdense'
:
XavierNormal
}
callbacks
=
[
LRHistory
,
EarlyStopping
,
ModelCheckpoint
(
filepath
=
'../model_weights/wd_out'
)]
callbacks
=
[
LRHistory
(
n_epochs
=
10
)
,
EarlyStopping
,
ModelCheckpoint
(
filepath
=
'../model_weights/wd_out'
)]
metrics
=
[
BinaryAccuracy
]
model
.
compile
(
method
=
'logistic'
,
initializers
=
initializers
,
method
=
'binary'
,
optimizers
=
optimizers
,
lr_schedulers
=
schedulers
,
initializers
=
initializers
,
callbacks
=
callbacks
,
metrics
=
metrics
)
...
...
examples/main_airbnb.py
浏览文件 @
e2b8ccbe
...
...
@@ -49,7 +49,7 @@ if __name__ == '__main__':
output_dim
=
1
)
deepdense
=
DeepDense
(
hidden_layers
=
[
64
,
32
],
dropout
=
[
0.
5
],
dropout
=
[
0.
2
,
0.2
],
deep_column_idx
=
prepare_deep
.
deep_column_idx
,
embed_input
=
prepare_deep
.
embeddings_input
,
continuous_cols
=
continuous_cols
)
...
...
@@ -58,31 +58,26 @@ if __name__ == '__main__':
hidden_dim
=
64
,
n_layers
=
3
,
rnn_dropout
=
0.5
,
spatial_dropout
=
0.5
,
padding_idx
=
1
,
embedding_matrix
=
text_processor
.
embedding_matrix
)
deepimage
=
DeepImage
(
pretrained
=
True
,
head_layers
=
None
)
model
=
WideDeep
(
wide
=
wide
,
deepdense
=
deepdense
,
deeptext
=
deeptext
,
deepimage
=
deepimage
,
head_layers
=
[
256
,
128
,
64
])
# pdb.set_trace()
deepimage
=
deepimage
)
wide_opt
=
torch
.
optim
.
Adam
(
model
.
wide
.
parameters
())
deep_opt
=
torch
.
optim
.
Adam
(
model
.
deepdense
.
parameters
())
text_opt
=
RAdam
(
model
.
deeptext
.
parameters
())
img_opt
=
RAdam
(
model
.
deepimage
.
parameters
())
head_opt
=
torch
.
optim
.
Adam
(
model
.
head
.
parameters
())
wide_sch
=
torch
.
optim
.
lr_scheduler
.
StepLR
(
wide_opt
,
step_size
=
5
)
deep_sch
=
torch
.
optim
.
lr_scheduler
.
StepLR
(
deep_opt
,
step_size
=
3
)
text_sch
=
torch
.
optim
.
lr_scheduler
.
StepLR
(
text_opt
,
step_size
=
5
)
img_sch
=
torch
.
optim
.
lr_scheduler
.
StepLR
(
img_opt
,
step_size
=
3
)
head_sch
=
torch
.
optim
.
lr_scheduler
.
StepLR
(
head_opt
,
step_size
=
5
)
optimizers
=
{
'wide'
:
wide_opt
,
'deepdense'
:
deep_opt
,
'deeptext'
:
text_opt
,
'deepimage'
:
img_opt
,
'head'
:
head_opt
}
schedulers
=
{
'wide'
:
wide_sch
,
'deepdense'
:
deep_sch
,
'deeptext'
:
text_sch
,
'deepimage'
:
img_sch
,
'head'
:
head_sch
}
initializers
=
{
'wide'
:
KaimingNormal
,
'deepdense'
:
KaimingNormal
,
'deeptext'
:
KaimingNormal
,
'deepimage'
:
KaimingNormal
,
'head'
:
KaimingNormal
}
optimizers
=
{
'wide'
:
wide_opt
,
'deepdense'
:
deep_opt
,
'deeptext'
:
text_opt
,
'deepimage'
:
img_opt
}
schedulers
=
{
'wide'
:
wide_sch
,
'deepdense'
:
deep_sch
,
'deeptext'
:
text_sch
,
'deepimage'
:
img_sch
}
initializers
=
{
'wide'
:
KaimingNormal
,
'deepdense'
:
KaimingNormal
,
'deeptext'
:
KaimingNormal
,
'deepimage'
:
KaimingNormal
}
mean
=
[
0.406
,
0.456
,
0.485
]
#BGR
std
=
[
0.225
,
0.224
,
0.229
]
#BGR
transforms
=
[
ToTensor
,
Normalize
(
mean
=
mean
,
std
=
std
)]
...
...
pytorch_widedeep/callbacks.py
浏览文件 @
e2b8ccbe
...
...
@@ -132,7 +132,9 @@ class History(Callback):
class
LRHistory
(
Callback
):
def
__init__
(
self
,
n_epochs
):
super
(
LRHistory
,
self
).
__init__
()
self
.
n_epochs
=
n_epochs
def
on_epoch_begin
(
self
,
epoch
:
int
,
logs
:
Optional
[
Dict
]
=
None
):
if
epoch
==
0
and
self
.
model
.
lr_scheduler
:
...
...
@@ -165,7 +167,7 @@ class LRHistory(Callback):
(
"_"
).
join
([
'lr'
,
str
(
group_idx
)]),[]).
append
(
group
[
'lr'
])
def
on_epoch_end
(
self
,
epoch
:
int
,
logs
:
Optional
[
Dict
]
=
None
):
if
self
.
model
.
lr_scheduler
:
if
epoch
!=
(
self
.
n_epochs
-
1
)
and
self
.
model
.
lr_scheduler
:
if
self
.
model
.
lr_scheduler
.
__class__
.
__name__
==
'MultipleLRScheduler'
:
for
model_name
,
opt
in
self
.
model
.
optimizer
.
_optimizers
.
items
():
if
model_name
in
self
.
model
.
lr_scheduler
.
_schedulers
:
...
...
pytorch_widedeep/models/deep_dense.py
浏览文件 @
e2b8ccbe
...
...
@@ -22,6 +22,54 @@ def dense_layer(inp:int, out:int, dropout:float, batchnorm=False):
class
DeepDense
(
nn
.
Module
):
r
"""Dense branch of the deep side of the model. This class combines embedding
representations of the categorical features with numerical (aka
continuous) features. These are then passed through a series of dense
layers.
Parameters
----------
deep_column_idx: Dict containing the index of the columns that will be
passed through the DeepDense model. Required to slice the tensors. e.g.
{'education': 0, 'relationship': 1, 'workclass': 2, ...}
hidden_layers: List with the number of neurons per dense layer. e.g: [64,32]
dropout: Optional List with the dropout between the dense layers.
e.g: [0.5,0.5]
batchnorm: Optional Boolean indicating whether or not to include batch
normalizatin in the dense layers
embeddings_input: Optional List of Tuples with the column name, number of
unique values and embedding dimension. e.g. [(education, 11, 32), ...]
continuous_cols: Optional List with the name of the numeric (aka
continuous) columns
**Either embeddings_input or continuous_cols (or both) should be passed to the
model
Attributes
----------
dense: nn.Sequential model of dense layers that will receive the
concatenation of the embeddings and the continuous columns
embed_layers: nn.ModuleDict with the embedding layers
output_dim: integer containing the output dimension of the model. This is a
required attribute neccesary to build the WideDeep class
Example
--------
>>> import torch
>>> from pytorch_widedeep.models import DeepDense
>>> X_deep = torch.cat((torch.empty(5, 4).random_(4), torch.rand(5, 1)), axis=1)
>>> colnames = ['a', 'b', 'c', 'd', 'e']
>>> embed_input = [(u,i,j) for u,i,j in zip(colnames[:4], [4]*4, [8]*4)]
>>> deep_column_idx = {k:v for v,k in enumerate(colnames)}
>>> model = DeepDense(hidden_layers=[8,4], deep_column_idx=deep_column_idx, embed_input=embed_input)
>>> model(X_deep)
tensor([[ 3.4470e-02, -2.0089e-03, 4.7983e-02, 3.3500e-01],
[ 1.4329e-02, -1.3800e-03, -3.3617e-04, 4.1046e-01],
[-3.3546e-04, 3.2413e-02, -4.1198e-03, 4.8717e-01],
[-6.7882e-04, 7.9103e-03, -1.9960e-03, 4.2134e-01],
[ 6.7187e-02, -1.2821e-03, -3.0960e-04, 3.6123e-01]],
grad_fn=<LeakyReluBackward1>)
"""
def
__init__
(
self
,
deep_column_idx
:
Dict
[
str
,
int
],
hidden_layers
:
List
[
int
],
...
...
@@ -50,7 +98,7 @@ class DeepDense(nn.Module):
# Dense Layers
input_dim
=
emb_inp_dim
+
cont_inp_dim
hidden_layers
=
[
input_dim
]
+
hidden_layers
dropout
=
[
0.
]
+
dropout
if
dropout
is
not
None
else
[
0.
]
*
(
len
(
hidden_layers
)
-
1
)
if
not
dropout
:
dropout
=
[
0.
]
*
len
(
hidden_layers
)
batchnorm
=
batchnorm
if
batchnorm
is
not
None
else
False
self
.
dense
=
nn
.
Sequential
()
for
i
in
range
(
1
,
len
(
hidden_layers
)):
...
...
pytorch_widedeep/models/deep_image.py
浏览文件 @
e2b8ccbe
...
...
@@ -20,6 +20,52 @@ def conv_layer(ni:int, nf:int, ks:int=3, stride:int=1, maxpool:bool=True,
class
DeepImage
(
nn
.
Module
):
r
"""
Standard image classifier/regressor using a pretrained network freezing
some of the first layers, or all layers. I use Resnets which have 9
"components" before the last dense layers.
The first 4 are: conv->batchnorm->relu->maxpool.
After that we have 4 additional 'layers' (resnet blocks) (so 4+4=8)
comprised by a series of convolutions and then the final AdaptiveAvgPool2d
(8+1=9). The parameter freeze sets the layers to be frozen. For example,
freeze=6 will freeze all but the last 2 Layers and AdaptiveAvgPool2d
layer. If freeze='all' it freezes the entire network. In addition, there
is the option to add a Fully Connected (FC) set of dense layers (FC-Head,
referred as 'imagehead') on top of the stack of RNNs
Parameters
----------
pretrained: boolean that indicates whether or not we use a pretrained Resnet network
or a series of conv layers (see conv_layer function)
resnet: int indicating the resnet architecture. One of 18, 34 or 50
freeze: int or string indicating the number of layers to freeze. If int
must be less than 8
head_layers: optional list with the sizes of the stacked dense layers in the head
e.g: [128, 64]
head_dropout: optional list with the dropout between the dense layers.
e.g: [0.5, 0.5].
head_batchnorm: Optional Boolean indicating whether or not to include batch
normalizatin in the dense layers that form the imagehead
Attributes
----------
backbone: Sequential stack of CNNs comprising the 'backbone' of the network
imagehead: Sequential stack of dense layers comprising the FC-Head (aka imagehead)
output_dim: integer containing the output dimension of the model. This is a
required attribute neccesary to build the WideDeep class
Example
--------
>>> import torch
>>> from pytorch_widedeep.models import DeepImage
>>> X_img = torch.rand((2,3,224,224))
>>> model = DeepImage(head_layers=[512, 64, 8])
>>> model(X_img)
tensor([[ 7.7234e-02, 8.0923e-02, 2.3077e-01, -5.1122e-03, -4.3018e-03,
3.1193e-01, 3.0780e-01, 6.5098e-01],
[ 4.6191e-02, 6.7856e-02, -3.0163e-04, -3.7670e-03, -2.1437e-03,
1.5416e-01, 3.9227e-01, 5.5048e-01]], grad_fn=<LeakyReluBackward1>)
"""
def
__init__
(
self
,
pretrained
:
bool
=
True
,
...
...
@@ -29,20 +75,6 @@ class DeepImage(nn.Module):
head_dropout
:
Optional
[
List
[
float
]]
=
None
,
head_batchnorm
:
Optional
[
bool
]
=
False
):
super
(
DeepImage
,
self
).
__init__
()
"""
Standard image classifier/regressor using a pretrained network
freezing some of the first layers (or all layers).
I use Resnets which have 9 "components" before the last dense layers.
The first 4 are: conv->batchnorm->relu->maxpool.
After that we have 4 additional 'layers' (so 4+4=8) comprised by a
series of convolutions and then the final AdaptiveAvgPool2d (8+1=9).
The parameter freeze sets the last layer to be frozen. For example,
freeze=6 will freeze all but the last 2 Layers and AdaptiveAvgPool2d
layer. If freeze='all' it freezes the entire network.
"""
self
.
head_layers
=
head_layers
...
...
@@ -86,9 +118,14 @@ class DeepImage(nn.Module):
self
.
output_dim
=
512
if
self
.
head_layers
is
not
None
:
self
.
head
=
nn
.
Sequential
()
assert
self
.
head_layers
[
0
]
==
self
.
output_dim
,
(
"The output dimension from the backbone ({}) is not consistent with "
"the expected input dimension ({}) of the fc-head"
.
format
(
self
.
output_dim
,
self
.
head_layers
[
0
]))
if
not
head_dropout
:
head_dropout
=
[
0.
]
*
len
(
head_layers
)
self
.
imagehead
=
nn
.
Sequential
()
for
i
in
range
(
1
,
len
(
head_layers
)):
self
.
head
.
add_module
(
self
.
image
head
.
add_module
(
'dense_layer_{}'
.
format
(
i
-
1
),
dense_layer
(
head_layers
[
i
-
1
],
head_layers
[
i
],
head_dropout
[
i
-
1
],
head_batchnorm
)
)
...
...
@@ -98,7 +135,7 @@ class DeepImage(nn.Module):
x
=
self
.
backbone
(
x
)
x
=
x
.
view
(
x
.
size
(
0
),
-
1
)
if
self
.
head_layers
is
not
None
:
out
=
self
.
head
(
x
)
out
=
self
.
image
head
(
x
)
return
out
else
:
return
x
\ No newline at end of file
pytorch_widedeep/models/deep_text.py
浏览文件 @
e2b8ccbe
...
...
@@ -4,22 +4,68 @@ import warnings
from
torch
import
nn
from
..wdtypes
import
*
from
.deep_dense
import
dense_layer
class
DeepText
(
nn
.
Module
):
r
"""Standard text classifier/regressor comprised by a stack of RNNs (LSTMs).
In addition, there is the option to add a Fully Connected (FC) set of dense
layers (FC-Head, referred as 'texthead') on top of the stack of RNNs
Parameters
----------
vocab_size: number of words in the vocabulary
hidden_dim: number of features in the hidden state h of the LSTM
n_layers: number of recurrent layers
rnn_dropout: dropout layer on the outputs of each LSTM layer except the last
layer
bidirectional: boolean indicating whether the staked RNNs are bidirectional
padding_idx: index of the padding token in the padded-tokenised sequences.
default: 1. I use the fastai tokenizer where the token index 0 is
reserved for the unknown word token
embed_dim: optional integer indicating the dimension of the word embedding matrix
embedding_matrix: optional array with pretrained word embeddings
head_layers: optional list with the sizes of the stacked dense layers in the head
e.g: [128, 64]
head_dropout: optional list with the dropout between the dense layers.
e.g: [0.5, 0.5].
head_batchnorm: Optional Boolean indicating whether or not to include batch
normalizatin in the dense layers that form the texthead
Attributes
----------
word_embed: Module with the word embedding matrix
rnn: Module with the stack of LSTMs
texthead: optional Sequential stack of dense layers
output_dim: integer containing the output dimension of the model. This is a
required attribute neccesary to build the WideDeep class
Example
--------
>>> import torch
>>> from pytorch_widedeep.models import DeepText
>>> X_text = torch.cat((torch.zeros([5,1]), torch.empty(5, 4).random_(1,4)), axis=1)
>>> model = DeepText(vocab_size=4, hidden_dim=4, n_layers=1, padding_idx=0, embed_dim=4)
>>> model(X_text)
tensor([[ 0.0315, 0.0393, -0.0618, -0.0561],
[-0.0674, 0.0297, -0.1118, -0.0668],
[-0.0446, 0.0814, -0.0921, -0.0338],
[-0.0844, 0.0681, -0.1016, -0.0464],
[-0.0268, 0.0294, -0.0988, -0.0666]], grad_fn=<SelectBackward>)
"""
def
__init__
(
self
,
vocab_size
:
int
,
hidden_dim
:
int
=
64
,
n_layers
:
int
=
3
,
rnn_dropout
:
float
=
0.
,
padding_idx
:
int
=
1
,
bidirectional
:
bool
=
False
,
padding_idx
:
int
=
1
,
embed_dim
:
Optional
[
int
]
=
None
,
embedding_matrix
:
Optional
[
np
.
ndarray
]
=
None
):
embedding_matrix
:
Optional
[
np
.
ndarray
]
=
None
,
head_layers
:
Optional
[
List
[
int
]]
=
None
,
head_dropout
:
Optional
[
List
[
float
]]
=
None
,
head_batchnorm
:
Optional
[
bool
]
=
False
):
super
(
DeepText
,
self
).
__init__
()
"""
Standard Text Classifier/Regressor with a stack of RNNs.
"""
if
embed_dim
is
not
None
and
embedding_matrix
is
not
None
and
not
embed_dim
==
embedding_matrix
.
shape
[
1
]:
warnings
.
warn
(
...
...
@@ -29,7 +75,7 @@ class DeepText(nn.Module):
embedding_matrix
.
shape
[
1
]),
UserWarning
)
self
.
bidirectional
=
bidirectional
self
.
word_embed_dropout
=
nn
.
Dropout2d
(
spatial_dropout
)
self
.
head_layers
=
head_layers
# Pre-trained Embeddings
if
isinstance
(
embedding_matrix
,
np
.
ndarray
):
...
...
@@ -50,6 +96,20 @@ class DeepText(nn.Module):
# the output_dim attribute will be used as input_dim when "merging" the models
self
.
output_dim
=
hidden_dim
*
2
if
bidirectional
else
hidden_dim
if
self
.
head_layers
is
not
None
:
assert
self
.
head_layers
[
0
]
==
self
.
output_dim
,
(
"The output dimension from the stack or RNNs ({}) is not consistent with "
"the expected input dimension ({}) of the fc-head"
.
format
(
self
.
output_dim
,
self
.
head_layers
[
0
]))
if
not
head_dropout
:
head_dropout
=
[
0.
]
*
len
(
head_layers
)
self
.
texthead
=
nn
.
Sequential
()
for
i
in
range
(
1
,
len
(
head_layers
)):
self
.
texthead
.
add_module
(
'dense_layer_{}'
.
format
(
i
-
1
),
dense_layer
(
head_layers
[
i
-
1
],
head_layers
[
i
],
head_dropout
[
i
-
1
],
head_batchnorm
)
)
self
.
output_dim
=
head_layers
[
-
1
]
def
forward
(
self
,
X
:
Tensor
)
->
Tensor
:
embed
=
self
.
word_embed
(
X
.
long
())
...
...
@@ -58,4 +118,8 @@ class DeepText(nn.Module):
last_h
=
torch
.
cat
((
h
[
-
2
],
h
[
-
1
]),
dim
=
1
)
else
:
last_h
=
h
[
-
1
]
return
last_h
if
self
.
head_layers
is
not
None
:
out
=
self
.
head
(
last_h
)
return
out
else
:
return
last_h
\ No newline at end of file
pytorch_widedeep/models/wide.py
浏览文件 @
e2b8ccbe
...
...
@@ -4,6 +4,30 @@ from torch import nn
from
..wdtypes
import
*
class
Wide
(
nn
.
Module
):
r
"""simple linear layer between the one-hot encoded wide input and the output
neuron.
Parameters
----------
wide_dim: size of the input tensor
output_dim: size of the ouput tensor
Attributes
----------
wide_linear: the linear layer that comprises the wide branch of the model
Example
--------
>>> import torch
>>> from pytorch_widedeep.models import Wide
>>> X = torch.empty(4, 4).random_(2)
>>> wide = Wide(wide_dim=X.size(0), output_dim=1)
>>> wide(X)
tensor([[-0.8841],
[-0.8633],
[-1.2713],
[-0.4762]], grad_fn=<AddmmBackward>)
"""
def
__init__
(
self
,
wide_dim
:
int
,
output_dim
:
int
=
1
):
super
(
Wide
,
self
).
__init__
()
self
.
wide_linear
=
nn
.
Linear
(
wide_dim
,
output_dim
)
...
...
pytorch_widedeep/models/wide_deep.py
浏览文件 @
e2b8ccbe
此差异已折叠。
点击以展开。
pytorch_widedeep/preprocessing/__init__.py
浏览文件 @
e2b8ccbe
from
._preprocessors
import
WidePreprocessor
from
._preprocessors
import
DeepPreprocessor
from
._preprocessors
import
TextPreprocessor
from
._preprocessors
import
ImagePreprocessor
from
._preprocessors
import
ImagePreprocessor
\ No newline at end of file
pytorch_widedeep/preprocessing/_preprocessors.py
浏览文件 @
e2b8ccbe
...
...
@@ -10,9 +10,9 @@ from scipy.sparse import csc_matrix
from
tqdm
import
tqdm
from
..wdtypes
import
*
from
.utils.dense_utils
import
*
from
.utils.text_utils
import
*
from
.utils.image_utils
import
*
from
.
.
utils.dense_utils
import
*
from
.
.
utils.text_utils
import
*
from
.
.
utils.image_utils
import
*
class
BasePreprocessor
(
object
):
...
...
@@ -184,7 +184,8 @@ class TextPreprocessor(BasePreprocessor):
if
self
.
verbose
:
print
(
"The vocabulary contains {} words"
.
format
(
len
(
self
.
vocab
.
stoi
)))
if
self
.
word_vectors_path
is
not
None
:
self
.
embedding_matrix
=
build_embeddings_matrix
(
self
.
vocab
,
self
.
word_vectors_path
)
self
.
embedding_matrix
=
build_embeddings_matrix
(
self
.
vocab
,
self
.
word_vectors_path
,
self
.
min_freq
)
return
padded_seq
def
fit_transform
(
self
,
df
:
pd
.
DataFrame
,
text_col
:
str
)
->
np
.
ndarray
:
...
...
pytorch_widedeep/
preprocessing/
utils/__init__.py
→
pytorch_widedeep/utils/__init__.py
浏览文件 @
e2b8ccbe
文件已移动
pytorch_widedeep/
preprocessing/
utils/dense_utils.py
→
pytorch_widedeep/utils/dense_utils.py
浏览文件 @
e2b8ccbe
import
numpy
as
np
import
pandas
as
pd
from
..
.
wdtypes
import
*
from
..wdtypes
import
*
pd
.
options
.
mode
.
chained_assignment
=
None
...
...
pytorch_widedeep/
preprocessing/
utils/fastai_transforms.py
→
pytorch_widedeep/utils/fastai_transforms.py
浏览文件 @
e2b8ccbe
...
...
@@ -8,7 +8,7 @@ way I avoid the numerous fastai dependencies.
Credit for the code here to Jeremy Howard and the fastai team
'''
from
..
.
wdtypes
import
*
from
..wdtypes
import
*
import
sys
import
os
...
...
pytorch_widedeep/
preprocessing/
utils/image_utils.py
→
pytorch_widedeep/utils/image_utils.py
浏览文件 @
e2b8ccbe
...
...
@@ -11,7 +11,7 @@ import numpy as np
import
imutils
import
cv2
from
..
.
wdtypes
import
*
from
..wdtypes
import
*
class
AspectAwarePreprocessor
:
...
...
pytorch_widedeep/
preprocessing/
utils/text_utils.py
→
pytorch_widedeep/utils/text_utils.py
浏览文件 @
e2b8ccbe
...
...
@@ -4,7 +4,7 @@ import html
import
os
import
re
from
..
.
wdtypes
import
*
from
..wdtypes
import
*
from
.fastai_transforms
import
Tokenizer
,
Vocab
from
gensim.utils
import
tokenize
...
...
@@ -35,7 +35,8 @@ def pad_sequences(seq:List[int], maxlen:int, pad_first:bool=True, pad_idx:int=1)
return
res
def
build_embeddings_matrix
(
vocab
:
Vocab
,
word_vectors_path
:
str
,
verbose
:
int
=
1
)
->
np
.
ndarray
:
def
build_embeddings_matrix
(
vocab
:
Vocab
,
word_vectors_path
:
str
,
min_freq
:
int
,
verbose
:
int
=
1
)
->
np
.
ndarray
:
if
not
os
.
path
.
isfile
(
word_vectors_path
):
raise
FileNotFoundError
(
"{} not found"
.
format
(
word_vectors_path
))
...
...
@@ -68,7 +69,8 @@ def build_embeddings_matrix(vocab:Vocab, word_vectors_path:str, verbose:int=1) -
embedding_matrix
[
i
]
=
mean_word_vector
if
verbose
:
print
(
'{} words in the vocabulary had {} vectors and appear more than the min frequency'
.
format
(
found_words
,
word_vectors_path
))
print
(
'{} words in the vocabulary had {} vectors and appear more than {} times'
.
format
(
found_words
,
word_vectors_path
,
min_freq
))
return
embedding_matrix
pytorch_widedeep/wdtypes.py
浏览文件 @
e2b8ccbe
import
sys
import
scipy
from
torch.nn
import
Module
from
torch
import
Tensor
...
...
@@ -10,9 +9,7 @@ from torch.optim.lr_scheduler import _LRScheduler
from
pathlib
import
PosixPath
from
typing
import
(
List
,
Any
,
Union
,
Dict
,
Callable
,
Optional
,
Tuple
,
Generator
,
Collection
,
Iterable
)
sparse_matrix
=
Union
[
scipy
.
sparse
.
csr
.
csr_matrix
]
from
scipy.sparse.csr
import
csr_matrix
as
sparse_matrix
SimpleNamespace
=
type
(
sys
.
implementation
)
ListRules
=
Collection
[
Callable
[[
str
],
str
]]
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录