Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Pytorch Widedeep
提交
f74d3a2d
P
Pytorch Widedeep
项目概览
Greenplum
/
Pytorch Widedeep
11 个月 前同步成功
通知
9
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Pytorch Widedeep
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
f74d3a2d
编写于
10月 11, 2019
作者:
J
jrzaurin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
temporal files used to check that modules run
上级
f349b6d5
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
80 addition
and
85 deletion
+80
-85
examples/main_adult.py
examples/main_adult.py
+23
-13
examples/main_airbnb.py
examples/main_airbnb.py
+57
-72
未找到文件。
examples/main_adult.py
浏览文件 @
f74d3a2d
...
...
@@ -3,14 +3,11 @@ import pandas as pd
import
torch
from
pathlib
import
Path
# from pytorch_widedeep.utils.data_utils import prepare_data
# from pytorch_widedeep.models.wide_deep import WideDeep
from
pytorch_widedeep.utils.wide_utils
import
WideProcessor
from
pytorch_widedeep.utils.deep_utils
import
DeepProcessor
# from pytorch_widedeep.initializers import Normal, Uniform, XavierNormal, XavierUniform
# from pytorch_widedeep.lr_schedulers import MultipleLRScheduler, StepLR, MultiStepLR, ReduceLROnPlateau
# from pytorch_widedeep.optimizers import Adam, SGD, RAdam
# from pytorch_widedeep.callbacks import EarlyStopping, ModelCheckpoint
# from pytorch_widedeep.metrics import BinaryAccuracy
from
pytorch_widedeep.models.wide
import
Wide
from
pytorch_widedeep.models.deep_dense
import
DeepDense
# use_cuda = torch.cuda.is_available()
...
...
@@ -26,21 +23,34 @@ if __name__ == '__main__':
df
.
drop
(
'income'
,
axis
=
1
,
inplace
=
True
)
df
.
head
()
from
pytorch_widedeep.utils.wide_utils
import
WideProcessor
wide_cols
=
[
'age_buckets'
,
'education'
,
'relationship'
,
'workclass'
,
'occupation'
,
'native_country'
,
'gender'
]
crossed_cols
=
[(
'education'
,
'occupation'
),
(
'native_country'
,
'occupation'
)]
prepare_wide
=
WideProcessor
(
wide_cols
=
wide_cols
,
crossed_cols
=
crossed_cols
)
X_wide
=
prepare_wide
.
fit_transform
(
df
)
from
pytorch_widedeep.utils.deep_utils
import
DeepProcessor
cat_embed_cols
=
[(
'education'
,
10
),
(
'relationship'
,
8
),
(
'workclass'
,
10
),
(
'occupation'
,
10
),(
'native_country'
,
10
)]
continuous_cols
=
[
"age"
,
"hours_per_week"
]
prepare_wide
=
WideProcessor
(
wide_cols
=
wide_cols
,
crossed_cols
=
crossed_cols
)
X_wide
=
prepare_wide
.
fit_transform
(
df
)
prepare_deep
=
DeepProcessor
(
embed_cols
=
cat_embed_cols
,
continuous_cols
=
continuous_cols
)
X_deep
=
prepare_deep
.
fit_transform
(
df
)
wide
=
Wide
(
X_wide
.
shape
[
1
],
1
)
pred_wide
=
wide
(
torch
.
tensor
(
X_wide
[:
10
]))
deep
=
DeepDense
(
hidden_layers
=
[
32
,
16
],
dropout
=
[
0.5
],
deep_column_idx
=
prepare_deep
.
deep_column_idx
,
embed_input
=
prepare_deep
.
embeddings_input
,
continuous_cols
=
continuous_cols
,
batchnorm
=
True
,
output_dim
=
1
)
pred_deep
=
deep
(
torch
.
tensor
(
X_deep
[:
10
]))
pdb
.
set_trace
()
# wd_dataset = prepare_data(df,
# target=target,
# wide_cols=wide_cols,
...
...
examples/main_airbnb.py
浏览文件 @
f74d3a2d
...
...
@@ -4,88 +4,73 @@ import pickle
import
numpy
as
np
import
pandas
as
pd
# from torchvision.transforms import ToTensor, Normalize
# from pytorch_widedeep.initializers import Normal, Uniform, XavierNormal, XavierUniform
# from pytorch_widedeep.lr_schedulers import MultipleLRScheduler, StepLR, MultiStepLR
# from pytorch_widedeep.optimizers import MultipleOptimizers, Adam, SGD, RAdam
# from pytorch_widedeep.callbacks import EarlyStopping, ModelCheckpoint
# from pytorch_widedeep.metrics import BinaryAccuracy
# from pytorch_widedeep.utils.data_utils import prepare_data
# from pytorch_widedeep.models.wide_deep import WideDeep, WideDeepLoader
from
pytorch_widedeep.utils.text_utils
import
TextProcessor
from
pytorch_widedeep.models.deep_text
import
DeepText
import
pdb
from
pytorch_widedeep.utils.image_utils
import
ImageProcessor
from
pytorch_widedeep.models.deep_image
import
DeepImage
from
pytorch_widedeep.utils.base_util
import
DataProcessor
import
pdb
use_cuda
=
torch
.
cuda
.
is_available
()
if
__name__
==
'__main__'
:
filepath
=
'data/wd_dataset_airbnb.p'
# if os.path.isfile(filepath):
# wd = pickle.load(open(filepath, "rb"))
# else:
# df = pd.read_csv('../data/airbnb/tmp_df.csv')
# crossed_cols = (['property_type', 'room_type'],)
# already_dummies = [c for c in df.columns if 'amenity' in c] + ['has_house_rules']
# wide_cols = ['is_location_exact', 'property_type', 'room_type', 'host_gender',
# 'instant_bookable'] + already_dummies
# cat_embed_cols = [(c, 16) for c in df.columns if 'catg' in c] + \
# [('neighbourhood_cleansed', 64), ('cancellation_policy', 16)]
# continuous_cols = ['latitude', 'longitude', 'security_deposit', 'extra_people']
# already_standard = ['latitude', 'longitude']
# text_col = 'description'
# word_vectors_path = 'data/glove.6B/glove.6B.100d.txt'
# img_col = 'id'
# img_path = 'data/airbnb/property_picture'
# target = 'yield'
# wd = prepare_data(df, target, wide_cols, crossed_cols, cat_embed_cols,
# continuous_cols, already_dummies, already_standard, text_col=text_col,
# word_vectors_path=word_vectors_path, img_col=img_col,
# img_path=img_path, filepath='data/wd_dataset_airbnb.p')
# from pytorch_widedeep.utils.text_utils import TextProcessor
# df = pd.read_csv('../data/airbnb/tmp_df.csv')
# text_col = 'description'
# word_vectors_path = '../data/glove.6B/glove.6B.100d.txt'
# text_processor = TextProcessor(word_vectors_path=word_vectors_path)
# X_text = text_processor.fit_transform(df, text_col)
# new_X = text_processor.transform(df.iloc[:10, :], text_col)
# pdb.set_trace()
df
=
pd
.
read_csv
(
'../data/airbnb/tmp_df.csv'
)
from
pytorch_widedeep.utils.image_utils
import
ImageProcessor
crossed_cols
=
([
'property_type'
,
'room_type'
],)
already_dummies
=
[
c
for
c
in
df
.
columns
if
'amenity'
in
c
]
+
[
'has_house_rules'
]
wide_cols
=
[
'is_location_exact'
,
'property_type'
,
'room_type'
,
'host_gender'
,
'instant_bookable'
]
+
already_dummies
cat_embed_cols
=
[(
c
,
16
)
for
c
in
df
.
columns
if
'catg'
in
c
]
+
\
[(
'neighbourhood_cleansed'
,
64
),
(
'cancellation_policy'
,
16
)]
continuous_cols
=
[
'latitude'
,
'longitude'
,
'security_deposit'
,
'extra_people'
]
already_standard
=
[
'latitude'
,
'longitude'
]
text_col
=
'description'
word_vectors_path
=
'../data/glove.6B/glove.6B.100d.txt'
img_col
=
'id'
img_path
=
'../data/airbnb/property_picture'
df
=
pd
.
read_csv
(
'../data/airbnb/tmp_df.csv'
)
target
=
'yield'
text_processor
=
TextProcessor
(
word_vectors_path
=
word_vectors_path
)
X_text
=
text_processor
.
fit_transform
(
df
,
text_col
)
deeptext
=
DeepText
(
vocab_size
=
len
(
text_processor
.
vocab
.
itos
),
hidden_dim
=
64
,
n_layers
=
3
,
rnn_dropout
=
0.5
,
spatial_dropout
=
0.5
,
padding_idx
=
1
,
output_dim
=
1
,
embedding_matrix
=
text_processor
.
embedding_matrix
)
image_processor
=
ImageProcessor
()
X_images
=
image_processor
.
fit_transform
(
df
,
img_col
,
img_path
)
new_X
=
image_processor
.
transform
(
df
.
iloc
[:
10
,:],
img_col
,
img_path
)
pdb
.
set_trace
()
model
=
WideDeep
(
output_dim
=
1
,
wide_dim
=
wd
.
wide
.
shape
[
1
],
cat_embed_input
=
wd
.
cat_embed_input
,
cat_embed_encoding_dict
=
wd
.
cat_embed_encoding_dict
,
continuous_cols
=
wd
.
continuous_cols
,
deep_column_idx
=
wd
.
deep_column_idx
,
add_text
=
True
,
vocab_size
=
len
(
wd
.
vocab
.
itos
),
word_embed_matrix
=
wd
.
word_embed_matrix
,
add_image
=
True
)
initializers
=
{
'wide'
:
Normal
,
'deepdense'
:
Normal
,
'deeptext'
:
Normal
,
'deepimage'
:
Normal
}
optimizers
=
{
'wide'
:
Adam
,
'deepdense'
:
Adam
,
'deeptext'
:
RAdam
,
'deepimage'
:
Adam
}
schedulers
=
{
'wide'
:
StepLR
(
step_size
=
5
),
'deepdense'
:
StepLR
(
step_size
=
5
),
'deeptext'
:
MultiStepLR
(
milestones
=
[
5
,
8
]),
'deepimage'
:
MultiStepLR
(
milestones
=
[
5
,
8
])}
mean
=
[
0.406
,
0.456
,
0.485
]
#BGR
std
=
[
0.225
,
0.224
,
0.229
]
#BGR
transforms
=
[
ToTensor
,
Normalize
(
mean
=
mean
,
std
=
std
)]
callbacks
=
[
EarlyStopping
,
ModelCheckpoint
(
filepath
=
'model_weights/wd_out.pt'
)]
model
.
compile
(
method
=
'regression'
,
initializers
=
initializers
,
optimizers
=
optimizers
,
lr_schedulers
=
schedulers
,
callbacks
=
callbacks
,
transforms
=
transforms
)
model
.
fit
(
X_wide
=
wd
.
wide
,
X_deep
=
wd
.
deepdense
,
X_text
=
wd
.
deeptext
,
X_img
=
wd
.
deepimage
,
target
=
wd
.
target
,
n_epochs
=
1
,
batch_size
=
32
,
val_split
=
0.2
)
\ No newline at end of file
deepimage
=
DeepImage
()
# model = WideDeep(output_dim=1, wide_dim=wd.wide.shape[1],
# cat_embed_input = wd.cat_embed_input,
# cat_embed_encoding_dict=wd.cat_embed_encoding_dict,
# continuous_cols=wd.continuous_cols,
# deep_column_idx=wd.deep_column_idx, add_text=True,
# vocab_size=len(wd.vocab.itos),
# word_embed_matrix = wd.word_embed_matrix,
# add_image=True)
# initializers = {'wide': Normal, 'deepdense':Normal, 'deeptext':Normal, 'deepimage':Normal}
# optimizers = {'wide': Adam, 'deepdense':Adam, 'deeptext':RAdam, 'deepimage':Adam}
# schedulers = {'wide': StepLR(step_size=5), 'deepdense':StepLR(step_size=5), 'deeptext':MultiStepLR(milestones=[5,8]),
# 'deepimage':MultiStepLR(milestones=[5,8])}
# mean = [0.406, 0.456, 0.485] #BGR
# std = [0.225, 0.224, 0.229] #BGR
# transforms = [ToTensor, Normalize(mean=mean, std=std)]
# callbacks = [EarlyStopping, ModelCheckpoint(filepath='model_weights/wd_out.pt')]
# model.compile(method='regression', initializers=initializers, optimizers=optimizers,
# lr_schedulers=schedulers, callbacks=callbacks, transforms=transforms)
# model.fit(X_wide=wd.wide, X_deep=wd.deepdense, X_text=wd.deeptext, X_img=wd.deepimage,
# target=wd.target, n_epochs=1, batch_size=32, val_split=0.2)
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录