Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Oneflow-Inc
OneFlow-Benchmark
提交
3d35bc83
O
OneFlow-Benchmark
项目概览
Oneflow-Inc
/
OneFlow-Benchmark
上一次同步 2 年多
通知
1
Star
92
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
OneFlow-Benchmark
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
3d35bc83
编写于
7月 12, 2020
作者:
S
ShawnXuan
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
rm usless lines
上级
4fda914e
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
51 addition
and
109 deletion
+51
-109
ClickThroughRate/WideDeepLearning/wdl_train_eval.py
ClickThroughRate/WideDeepLearning/wdl_train_eval.py
+23
-50
ClickThroughRate/WideDeepLearning/wdl_train_eval_test.py
ClickThroughRate/WideDeepLearning/wdl_train_eval_test.py
+28
-59
未找到文件。
ClickThroughRate/WideDeepLearning/wdl_train_eval.py
浏览文件 @
3d35bc83
...
...
@@ -38,50 +38,20 @@ DEEP_HIDDEN_UNITS = [FLAGS.hidden_size for i in range(FLAGS.hidden_units_num)]
print
(
DEEP_HIDDEN_UNITS
)
def
_raw_blob_conf
(
name
,
shape
,
data_type
):
return
flow
.
data
.
BlobConf
(
name
=
name
,
shape
=
shape
,
dtype
=
data_type
,
codec
=
flow
.
data
.
RawCodec
())
def
_data_loader
(
data_dir
,
data_part_num
,
batch_size
):
blob_conf
=
[
_raw_blob_conf
(
'labels'
,
(
1
,),
flow
.
int32
),
_raw_blob_conf
(
'dense_fields'
,
(
FLAGS
.
num_dense_fields
,),
flow
.
float
),
_raw_blob_conf
(
'wide_sparse_fields'
,
(
FLAGS
.
num_wide_sparse_fields
,),
flow
.
int32
),
_raw_blob_conf
(
'deep_sparse_fields'
,
(
FLAGS
.
num_deep_sparse_fields
,),
flow
.
int32
)
]
blobs
=
flow
.
data
.
decode_ofrecord
(
data_dir
,
blobs
=
blob_conf
,
batch_size
=
batch_size
,
name
=
"decode"
,
data_part_num
=
data_part_num
,
part_name_suffix_length
=
FLAGS
.
train_part_name_suffix_length
,
)
# copy to gpu
blobs
=
tuple
(
map
(
lambda
blob
:
flow
.
identity
(
blob
),
blobs
))
return
blobs
def
_data_loader_ofrecord_new
(
data_dir
,
data_part_num
,
batch_size
,
shuffle
=
True
):
def
_data_loader_ofrecord
(
data_dir
,
data_part_num
,
batch_size
,
part_name_suffix_length
=-
1
,
shuffle
=
True
):
ofrecord
=
flow
.
data
.
ofrecord_reader
(
data_dir
,
batch_size
=
batch_size
,
data_part_num
=
data_part_num
,
part_name_suffix_length
=
FLAGS
.
train_part_name_suffix_length
,
random_shuffle
=
shuffle
,
shuffle_after_epoch
=
shuffle
)
labels
=
flow
.
data
.
OFRecordRawDecoder
(
ofrecord
,
"labels"
,
shape
=
(
1
,),
dtype
=
flow
.
int32
)
dense_fields
=
flow
.
data
.
OFRecordRawDecoder
(
ofrecord
,
"dense_fields"
,
shape
=
(
FLAGS
.
num_dense_fields
,),
dtype
=
flow
.
float
)
wide_sparse_fields
=
flow
.
data
.
OFRecordRawDecoder
(
ofrecord
,
"wide_sparse_fields"
,
shape
=
(
FLAGS
.
num_wide_sparse_fields
,),
dtype
=
flow
.
int32
)
deep_sparse_fields
=
flow
.
data
.
OFRecordRawDecoder
(
ofrecord
,
"deep_sparse_fields"
,
shape
=
(
FLAGS
.
num_deep_sparse_fields
,),
dtype
=
flow
.
int32
)
return
flow
.
identity_n
([
labels
,
dense_fields
,
wide_sparse_fields
,
deep_sparse_fields
])
def
_data_loader_onerec
(
data_dir
,
data_part_num
,
batch_size
):
files
=
glob
.
glob
(
os
.
path
.
join
(
data_dir
,
'*.onerec'
))
readdata
=
flow
.
data
.
onerec_reader
(
files
=
files
,
batch_size
=
batch_size
)
labels
=
flow
.
data
.
onerec_decoder
(
readdata
,
key
=
'labels'
,
dtype
=
flow
.
int32
,
shape
=
(
1
,))
dense_fields
=
flow
.
data
.
onerec_decoder
(
readdata
,
key
=
'dense_fields'
,
dtype
=
flow
.
float
,
shape
=
(
FLAGS
.
num_dense_fields
,))
wide_sparse_fields
=
flow
.
data
.
onerec_decoder
(
readdata
,
key
=
'wide_sparse_fields'
,
dtype
=
flow
.
int32
,
shape
=
(
FLAGS
.
num_wide_sparse_fields
,))
deep_sparse_fields
=
flow
.
data
.
onerec_decoder
(
readdata
,
key
=
'deep_sparse_fields'
,
dtype
=
flow
.
int32
,
shape
=
(
FLAGS
.
num_deep_sparse_fields
,))
batch_size
=
batch_size
,
data_part_num
=
data_part_num
,
part_name_suffix_length
=
part_name_suffix_length
,
random_shuffle
=
shuffle
,
shuffle_after_epoch
=
shuffle
)
def
_blob_decoder
(
bn
,
shape
,
dtype
=
flow
.
int32
):
return
flow
.
data
.
OFRecordRawDecoder
(
ofrecord
,
bn
,
shape
=
shape
,
dtype
=
dtype
)
labels
=
_blob_decoder
(
"labels"
,
(
1
,))
dense_fields
=
_blob_decoder
(
"dense_fields"
,
(
FLAGS
.
num_dense_fields
,),
flow
.
float
)
wide_sparse_fields
=
_blob_decoder
(
"wide_sparse_fields"
,
(
FLAGS
.
num_wide_sparse_fields
,))
deep_sparse_fields
=
_blob_decoder
(
"deep_sparse_fields"
,
(
FLAGS
.
num_deep_sparse_fields
,))
return
flow
.
identity_n
([
labels
,
dense_fields
,
wide_sparse_fields
,
deep_sparse_fields
])
...
...
@@ -174,9 +144,11 @@ def _create_train_callback(step):
@
flow
.
global_function
(
_get_train_conf
())
def
train_job
():
labels
,
dense_fields
,
wide_sparse_fields
,
deep_sparse_fields
=
\
_data_loader_ofrecord_new
(
data_dir
=
FLAGS
.
train_data_dir
,
data_part_num
=
FLAGS
.
train_data_part_num
,
batch_size
=
FLAGS
.
batch_size
)
_data_loader_ofrecord
(
data_dir
=
FLAGS
.
train_data_dir
,
data_part_num
=
FLAGS
.
train_data_part_num
,
batch_size
=
FLAGS
.
batch_size
,
part_name_suffix_length
=
FLAGS
.
train_part_name_suffix_length
,
shuffle
=
True
)
logits
=
_model
(
dense_fields
,
wide_sparse_fields
,
deep_sparse_fields
)
loss
=
flow
.
nn
.
sigmoid_cross_entropy_with_logits
(
labels
=
labels
,
logits
=
logits
)
flow
.
losses
.
add_loss
(
loss
)
...
...
@@ -186,10 +158,11 @@ def train_job():
@
flow
.
global_function
(
_get_eval_conf
())
def
eval_job
():
labels
,
dense_fields
,
wide_sparse_fields
,
deep_sparse_fields
=
\
_data_loader_ofrecord_new
(
data_dir
=
FLAGS
.
eval_data_dir
,
data_part_num
=
FLAGS
.
eval_data_part_num
,
batch_size
=
FLAGS
.
batch_size
,
shuffle
=
False
)
_data_loader_ofrecord
(
data_dir
=
FLAGS
.
eval_data_dir
,
data_part_num
=
FLAGS
.
eval_data_part_num
,
batch_size
=
FLAGS
.
batch_size
,
part_name_suffix_length
=
FLAGS
.
eval_part_name_suffix_length
,
shuffle
=
False
)
logits
=
_model
(
dense_fields
,
wide_sparse_fields
,
deep_sparse_fields
)
loss
=
flow
.
nn
.
sigmoid_cross_entropy_with_logits
(
labels
=
labels
,
logits
=
logits
)
predict
=
flow
.
math
.
sigmoid
(
logits
)
...
...
ClickThroughRate/WideDeepLearning/wdl_train_eval_test.py
浏览文件 @
3d35bc83
...
...
@@ -45,51 +45,20 @@ eval_epoch_size = FLAGS.eval_data_num // FLAGS.batch_size + 1
test_epoch_size
=
FLAGS
.
test_data_num
//
FLAGS
.
batch_size
+
1
def
_raw_blob_conf
(
name
,
shape
,
data_type
):
return
flow
.
data
.
BlobConf
(
name
=
name
,
shape
=
shape
,
dtype
=
data_type
,
codec
=
flow
.
data
.
RawCodec
())
def
_data_loader
(
data_dir
,
data_part_num
,
batch_size
):
blob_conf
=
[
_raw_blob_conf
(
'labels'
,
(
1
,),
flow
.
int32
),
_raw_blob_conf
(
'dense_fields'
,
(
FLAGS
.
num_dense_fields
,),
flow
.
float
),
_raw_blob_conf
(
'wide_sparse_fields'
,
(
FLAGS
.
num_wide_sparse_fields
,),
flow
.
int32
),
_raw_blob_conf
(
'deep_sparse_fields'
,
(
FLAGS
.
num_deep_sparse_fields
,),
flow
.
int32
)
]
blobs
=
flow
.
data
.
decode_ofrecord
(
data_dir
,
blobs
=
blob_conf
,
batch_size
=
batch_size
,
name
=
"decode"
,
data_part_num
=
data_part_num
,
part_name_suffix_length
=
FLAGS
.
train_part_name_suffix_length
,
)
# copy to gpu
blobs
=
tuple
(
map
(
lambda
blob
:
flow
.
identity
(
blob
),
blobs
))
return
blobs
def
_data_loader_ofrecord_new
(
data_dir
,
data_part_num
,
batch_size
,
part_name_suffix_length
=-
1
,
shuffle
=
True
):
def
_data_loader_ofrecord
(
data_dir
,
data_part_num
,
batch_size
,
part_name_suffix_length
=-
1
,
shuffle
=
True
):
ofrecord
=
flow
.
data
.
ofrecord_reader
(
data_dir
,
batch_size
=
batch_size
,
data_part_num
=
data_part_num
,
part_name_suffix_length
=
part_name_suffix_length
,
random_shuffle
=
shuffle
,
shuffle_after_epoch
=
shuffle
)
labels
=
flow
.
data
.
OFRecordRawDecoder
(
ofrecord
,
"labels"
,
shape
=
(
1
,),
dtype
=
flow
.
int32
)
dense_fields
=
flow
.
data
.
OFRecordRawDecoder
(
ofrecord
,
"dense_fields"
,
shape
=
(
FLAGS
.
num_dense_fields
,),
dtype
=
flow
.
float
)
wide_sparse_fields
=
flow
.
data
.
OFRecordRawDecoder
(
ofrecord
,
"wide_sparse_fields"
,
shape
=
(
FLAGS
.
num_wide_sparse_fields
,),
dtype
=
flow
.
int32
)
deep_sparse_fields
=
flow
.
data
.
OFRecordRawDecoder
(
ofrecord
,
"deep_sparse_fields"
,
shape
=
(
FLAGS
.
num_deep_sparse_fields
,),
dtype
=
flow
.
int32
)
return
flow
.
identity_n
([
labels
,
dense_fields
,
wide_sparse_fields
,
deep_sparse_fields
])
def
_data_loader_onerec
(
data_dir
,
data_part_num
,
batch_size
):
files
=
glob
.
glob
(
os
.
path
.
join
(
data_dir
,
'*.onerec'
))
readdata
=
flow
.
data
.
onerec_reader
(
files
=
files
,
batch_size
=
batch_size
)
labels
=
flow
.
data
.
onerec_decoder
(
readdata
,
key
=
'labels'
,
dtype
=
flow
.
int32
,
shape
=
(
1
,))
dense_fields
=
flow
.
data
.
onerec_decoder
(
readdata
,
key
=
'dense_fields'
,
dtype
=
flow
.
float
,
shape
=
(
FLAGS
.
num_dense_fields
,))
wide_sparse_fields
=
flow
.
data
.
onerec_decoder
(
readdata
,
key
=
'wide_sparse_fields'
,
dtype
=
flow
.
int32
,
shape
=
(
FLAGS
.
num_wide_sparse_fields
,))
deep_sparse_fields
=
flow
.
data
.
onerec_decoder
(
readdata
,
key
=
'deep_sparse_fields'
,
dtype
=
flow
.
int32
,
shape
=
(
FLAGS
.
num_deep_sparse_fields
,))
batch_size
=
batch_size
,
data_part_num
=
data_part_num
,
part_name_suffix_length
=
part_name_suffix_length
,
random_shuffle
=
shuffle
,
shuffle_after_epoch
=
shuffle
)
def
_blob_decoder
(
bn
,
shape
,
dtype
=
flow
.
int32
):
return
flow
.
data
.
OFRecordRawDecoder
(
ofrecord
,
bn
,
shape
=
shape
,
dtype
=
dtype
)
labels
=
_blob_decoder
(
"labels"
,
(
1
,))
dense_fields
=
_blob_decoder
(
"dense_fields"
,
(
FLAGS
.
num_dense_fields
,),
flow
.
float
)
wide_sparse_fields
=
_blob_decoder
(
"wide_sparse_fields"
,
(
FLAGS
.
num_wide_sparse_fields
,))
deep_sparse_fields
=
_blob_decoder
(
"deep_sparse_fields"
,
(
FLAGS
.
num_deep_sparse_fields
,))
return
flow
.
identity_n
([
labels
,
dense_fields
,
wide_sparse_fields
,
deep_sparse_fields
])
...
...
@@ -183,11 +152,11 @@ def _create_train_callback(epoch, step):
@
flow
.
global_function
(
_get_train_conf
())
def
train_job
():
labels
,
dense_fields
,
wide_sparse_fields
,
deep_sparse_fields
=
\
_data_loader_ofrecord
_new
(
data_dir
=
FLAGS
.
train_data_dir
,
data_part_num
=
FLAGS
.
train_data_part_num
,
batch_size
=
FLAGS
.
batch_size
,
part_name_suffix_length
=
FLAGS
.
train_part_name_suffix_length
,
shuffle
=
True
)
_data_loader_ofrecord
(
data_dir
=
FLAGS
.
train_data_dir
,
data_part_num
=
FLAGS
.
train_data_part_num
,
batch_size
=
FLAGS
.
batch_size
,
part_name_suffix_length
=
FLAGS
.
train_part_name_suffix_length
,
shuffle
=
True
)
logits
=
_model
(
dense_fields
,
wide_sparse_fields
,
deep_sparse_fields
)
loss
=
flow
.
nn
.
sigmoid_cross_entropy_with_logits
(
labels
=
labels
,
logits
=
logits
)
flow
.
losses
.
add_loss
(
loss
)
...
...
@@ -197,11 +166,11 @@ def train_job():
@
flow
.
global_function
(
_get_eval_conf
())
def
eval_job
():
labels
,
dense_fields
,
wide_sparse_fields
,
deep_sparse_fields
=
\
_data_loader_ofrecord
_new
(
data_dir
=
FLAGS
.
eval_data_dir
,
data_part_num
=
FLAGS
.
eval_data_part_num
,
batch_size
=
FLAGS
.
batch_size
,
part_name_suffix_length
=
FLAGS
.
eval_part_name_suffix_length
,
shuffle
=
False
)
_data_loader_ofrecord
(
data_dir
=
FLAGS
.
eval_data_dir
,
data_part_num
=
FLAGS
.
eval_data_part_num
,
batch_size
=
FLAGS
.
batch_size
,
part_name_suffix_length
=
FLAGS
.
eval_part_name_suffix_length
,
shuffle
=
False
)
logits
=
_model
(
dense_fields
,
wide_sparse_fields
,
deep_sparse_fields
)
loss
=
flow
.
nn
.
sigmoid_cross_entropy_with_logits
(
labels
=
labels
,
logits
=
logits
)
predict
=
flow
.
math
.
sigmoid
(
logits
)
...
...
@@ -210,11 +179,11 @@ def eval_job():
@
flow
.
global_function
(
_get_eval_conf
())
def
test_job
():
labels
,
dense_fields
,
wide_sparse_fields
,
deep_sparse_fields
=
\
_data_loader_ofrecord
_new
(
data_dir
=
FLAGS
.
test_data_dir
,
data_part_num
=
FLAGS
.
test_data_part_num
,
batch_size
=
FLAGS
.
batch_size
,
part_name_suffix_length
=
FLAGS
.
test_part_name_suffix_length
,
shuffle
=
False
)
_data_loader_ofrecord
(
data_dir
=
FLAGS
.
test_data_dir
,
data_part_num
=
FLAGS
.
test_data_part_num
,
batch_size
=
FLAGS
.
batch_size
,
part_name_suffix_length
=
FLAGS
.
test_part_name_suffix_length
,
shuffle
=
False
)
logits
=
_model
(
dense_fields
,
wide_sparse_fields
,
deep_sparse_fields
)
loss
=
flow
.
nn
.
sigmoid_cross_entropy_with_logits
(
labels
=
labels
,
logits
=
logits
)
predict
=
flow
.
math
.
sigmoid
(
logits
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录