Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Pytorch Widedeep
提交
35b8c03b
P
Pytorch Widedeep
项目概览
Greenplum
/
Pytorch Widedeep
11 个月 前同步成功
通知
9
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Pytorch Widedeep
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
35b8c03b
编写于
9月 25, 2019
作者:
J
jrzaurin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
temporal file for testing purposes
上级
ba65b112
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
90 addition
and
75 deletion
+90
-75
main.py
main.py
+90
-75
未找到文件。
main.py
浏览文件 @
35b8c03b
...
...
@@ -5,89 +5,104 @@ import numpy as np
import
pandas
as
pd
from
pathlib
import
Path
from
torchvision
import
transforms
from
widedeep.models.wide_deep
import
WideDeep
,
WideDeepLoader
from
pytorch_
widedeep.models.wide_deep
import
WideDeep
,
WideDeepLoader
from
sklearn.metrics
import
mean_squared_error
import
pdb
if
__name__
==
'__main__'
:
use_cuda
=
torch
.
cuda
.
is_available
()
wd_dataset
=
pickle
.
load
(
open
(
"data/airbnb/wide_deep_data/wd_dataset.p"
,
"rb"
))
params
=
dict
()
params
[
'wide'
]
=
dict
(
wide_dim
=
wd_dataset
[
'train'
][
'wide'
].
shape
[
1
]
)
params
[
'deep_dense'
]
=
dict
(
embeddings_input
=
wd_dataset
[
'cat_embeddings_input'
],
embeddings_encoding_dict
=
wd_dataset
[
'cat_embeddings_encoding_dict'
],
continuous_cols
=
wd_dataset
[
'continuous_cols'
],
deep_column_idx
=
wd_dataset
[
'deep_column_idx'
],
hidden_layers
=
[
64
,
32
],
dropout
=
[
0.5
]
)
params
[
'deep_text'
]
=
dict
(
vocab_size
=
len
(
wd_dataset
[
'vocab'
].
itos
),
embedding_dim
=
wd_dataset
[
'word_embeddings_matrix'
].
shape
[
1
],
hidden_dim
=
64
,
n_layers
=
2
,
rnn_dropout
=
0.5
,
spatial_dropout
=
0.1
,
padding_idx
=
1
,
attention
=
False
,
bidirectional
=
True
,
embedding_matrix
=
wd_dataset
[
'word_embeddings_matrix'
]
)
params
[
'deep_img'
]
=
dict
(
pretrained
=
True
,
freeze
=
'all'
,
)
wd
=
pickle
.
load
(
open
(
'data/wd_dataset.p'
,
'rb'
))
model
=
WideDeep
(
output_dim
=
1
,
**
params
)
# optimizer={'widedeep': ['Adam', 0.1]}
# lr_scheduler = {'widedeep': ['MultiStepLR', [3,5,7], 0.1]}
optimizer
=
dict
(
wide
=
[
'Adam'
,
0.1
],
deep_dense
=
[
'Adam'
,
0.01
],
deep_text
=
[
'RMSprop'
,
0.01
,
0.1
],
deep_img
=
[
'Adam'
,
0.01
]
)
lr_scheduler
=
dict
(
wide
=
[
'StepLR'
,
3
,
0.1
],
deep_dense
=
[
'StepLR'
,
3
,
0.1
],
deep_text
=
[
'MultiStepLR'
,
[
3
,
5
,
7
],
0.1
],
deep_img
=
[
'MultiStepLR'
,
[
3
,
5
,
7
],
0.1
]
)
model
.
compile
(
method
=
'regression'
,
optimizer
=
optimizer
,
lr_scheduler
=
lr_scheduler
)
if
use_cuda
:
model
=
model
.
cuda
()
# # ImageNet metrics
# mean=[0.485, 0.456, 0.406] #RGB
# std=[0.229, 0.224, 0.225] #RGB
# cv2 reads BGR
mean
=
[
0.406
,
0.456
,
0.485
]
#BGR
std
=
[
0.225
,
0.224
,
0.229
]
#BGR
transform
=
transforms
.
Compose
([
transforms
.
ToTensor
(),
transforms
.
Normalize
(
mean
=
mean
,
std
=
std
)
])
train_set
=
WideDeepLoader
(
wd_dataset
[
'train'
],
transform
,
mode
=
'train'
)
valid_set
=
WideDeepLoader
(
wd_dataset
[
'valid'
],
transform
,
mode
=
'train'
)
test_set
=
WideDeepLoader
(
wd_dataset
[
'test'
],
transform
,
mode
=
'test'
)
train_loader
=
torch
.
utils
.
data
.
DataLoader
(
dataset
=
train_set
,
batch_size
=
64
,
num_workers
=
4
,
shuffle
=
True
)
valid_loader
=
torch
.
utils
.
data
.
DataLoader
(
dataset
=
valid_set
,
batch_size
=
64
,
num_workers
=
4
,
shuffle
=
True
)
test_loader
=
torch
.
utils
.
data
.
DataLoader
(
dataset
=
test_set
,
batch_size
=
32
,
shuffle
=
False
)
model
.
fit
(
n_epochs
=
10
,
train_loader
=
train_loader
,
eval_loader
=
valid_loader
)
preds
=
model
.
predict
(
test_loader
)
y
=
wd_dataset
[
'test'
][
'target'
]
print
(
np
.
sqrt
(
mean_squared_error
(
y
,
preds
)))
# save
MODEL_DIR
=
Path
(
'data/models'
)
if
not
MODEL_DIR
.
exists
():
os
.
makedirs
(
MODEL_DIR
)
torch
.
save
(
model
.
state_dict
(),
MODEL_DIR
/
'widedeep.pkl'
)
model
=
WideDeep
(
output_dim
=
1
,
wide_dim
=
wd
.
wide
.
shape
[
1
],
embeddings_input
=
wd
.
cat_embeddings_input
,
embeddings_encoding_dict
=
wd
.
cat_embeddings_encoding_dict
,
continuous_cols
=
wd
.
continuous_cols
,
deep_column_idx
=
wd
.
deep_column_idx
,
vocab_size
=
len
(
wd
.
vocab
.
itos
),
pretrained
=
False
)
model
.
compile
(
method
=
'regression'
,
optimizer
=
'Adam'
)
print
(
model
.
optimizer
)
print
(
model
.
lr_scheduler
)
print
(
model
)
# wd_dataset = pickle.load(open("data/airbnb/wide_deep_data/wd_dataset.p", "rb"))
# params = dict()
# params['wide'] = dict(
# wide_dim = wd_dataset['train']['wide'].shape[1]
# )
# params['deep_dense'] = dict(
# embeddings_input = wd_dataset['cat_embeddings_input'],
# embeddings_encoding_dict = wd_dataset['cat_embeddings_encoding_dict'],
# continuous_cols = wd_dataset['continuous_cols'],
# deep_column_idx = wd_dataset['deep_column_idx'],
# hidden_layers = [64,32],
# dropout = [0.5]
# )
# params['deep_text'] = dict(
# vocab_size = len(wd_dataset['vocab'].itos),
# embedding_dim = wd_dataset['word_embeddings_matrix'].shape[1],
# hidden_dim = 64,
# n_layers = 2,
# rnn_dropout = 0.5,
# spatial_dropout = 0.1,
# padding_idx = 1,
# attention = False,
# bidirectional = True,
# embedding_matrix = wd_dataset['word_embeddings_matrix']
# )
# params['deep_img'] = dict(
# pretrained = True,
# freeze='all',
# )
# model = WideDeep(output_dim=1, **params)
# # optimizer={'widedeep': ['Adam', 0.1]}
# # lr_scheduler = {'widedeep': ['MultiStepLR', [3,5,7], 0.1]}
# optimizer=dict(
# wide=['Adam', 0.1],
# deep_dense=['Adam', 0.01],
# deep_text=['RMSprop', 0.01,0.1],
# deep_img= ['Adam', 0.01]
# )
# lr_scheduler=dict(
# wide=['StepLR', 3, 0.1],
# deep_dense=['StepLR', 3, 0.1],
# deep_text=['MultiStepLR', [3,5,7], 0.1],
# deep_img=['MultiStepLR', [3,5,7], 0.1]
# )
# model.compile(method='regression', optimizer=optimizer, lr_scheduler=lr_scheduler)
# if use_cuda:
# model = model.cuda()
# # # ImageNet metrics
# # mean=[0.485, 0.456, 0.406] #RGB
# # std=[0.229, 0.224, 0.225] #RGB
# # cv2 reads BGR
# mean=[0.406, 0.456, 0.485] #BGR
# std=[0.225, 0.224, 0.229] #BGR
# transform = transforms.Compose([
# transforms.ToTensor(),
# transforms.Normalize(mean=mean, std=std)
# ])
# train_set = WideDeepLoader(wd_dataset['train'], transform, mode='train')
# valid_set = WideDeepLoader(wd_dataset['valid'], transform, mode='train')
# test_set = WideDeepLoader(wd_dataset['test'], transform, mode='test')
# train_loader = torch.utils.data.DataLoader(dataset=train_set,
# batch_size=64, num_workers=4, shuffle=True)
# valid_loader = torch.utils.data.DataLoader(dataset=valid_set,
# batch_size=64, num_workers=4, shuffle=True)
# test_loader = torch.utils.data.DataLoader(dataset=test_set,
# batch_size=32,shuffle=False)
# model.fit(n_epochs=10, train_loader=train_loader, eval_loader=valid_loader)
# preds = model.predict(test_loader)
# y = wd_dataset['test']['target']
# print(np.sqrt(mean_squared_error(y, preds)))
# # save
# MODEL_DIR = Path('data/models')
# if not MODEL_DIR.exists(): os.makedirs(MODEL_DIR)
# torch.save(model.state_dict(), MODEL_DIR/'widedeep.pkl')
# load
# model = WideDeep(1, **params)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录