Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
PaddleRec
提交
aa5a08e0
P
PaddleRec
项目概览
BaiXuePrincess
/
PaddleRec
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleRec
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleRec
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
aa5a08e0
编写于
3月 31, 2020
作者:
T
tangwei
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add ctr-dnn demo
上级
154e5da2
变更
6
展开全部
显示空白变更内容
内联
并排
Showing
6 changed file
with
398 addition
and
22 deletion
+398
-22
models/ctr_dnn/hyper_parameters.yaml
models/ctr_dnn/hyper_parameters.yaml
+8
-0
models/ctr_dnn/model.py
models/ctr_dnn/model.py
+98
-18
models/ctr_dnn/reader.py
models/ctr_dnn/reader.py
+67
-4
models/ctr_dnn/sample_test.txt
models/ctr_dnn/sample_test.txt
+100
-0
models/ctr_dnn/sample_train.txt
models/ctr_dnn/sample_train.txt
+100
-0
utils/envs.py
utils/envs.py
+25
-0
未找到文件。
models/ctr_dnn/hyper_parameters.yaml
浏览文件 @
aa5a08e0
{
"
sparse_inputs_slots"
:
27
,
"
sparse_feature_number"
:
1000001
,
"
sparse_feature_dim"
:
8
,
"
dense_input_dim"
:
13
,
"
fc_sizes"
:
[
400
,
400
,
40
],
"
learning_rate"
:
0.001
}
\ No newline at end of file
models/ctr_dnn/model.py
浏览文件 @
aa5a08e0
class
TrainModel
(
object
):
import
math
def
input
(
self
):
import
paddle.fluid
as
fluid
pass
def
net
(
self
):
from
...utils
import
envs
pass
def
net
(
self
):
pass
def
loss
(
self
):
class
Train
(
object
):
pass
def
optimizer
(
self
):
def
__init__
(
self
):
pass
self
.
sparse_inputs
=
[]
self
.
dense_input
=
None
self
.
label_input
=
None
self
.
sparse_input_varnames
=
[]
self
.
dense_input_varname
=
None
self
.
label_input_varname
=
None
class
InferModel
(
object
):
def
input
(
self
):
def
input
(
self
):
pass
def
sparse_inputs
():
ids
=
envs
.
get_global_env
(
"sparse_inputs_counts"
)
def
net
(
self
):
sparse_input_ids
=
[
pass
fluid
.
layers
.
data
(
name
=
"C"
+
str
(
i
),
shape
=
[
1
],
lod_level
=
1
,
dtype
=
"int64"
)
for
i
in
range
(
ids
)
]
return
sparse_input_ids
,
[
var
.
name
for
var
in
sparse_input_ids
]
def
dense_input
():
dense_input_dim
=
envs
.
get_global_env
(
"dense_input_dim"
)
dense_input_var
=
fluid
.
layers
.
data
(
name
=
"dense_input"
,
shape
=
dense_input_dim
,
dtype
=
"float32"
)
return
dense_input_var
,
dense_input_var
.
name
def
label_input
():
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
"int64"
)
return
label
,
label
.
name
self
.
sparse_inputs
,
self
.
sparse_input_varnames
=
sparse_inputs
()
self
.
dense_input
,
self
.
dense_input_varname
=
dense_input
()
self
.
label_input
,
self
.
label_input_varname
=
label_input
()
def
net
(
self
):
def
net
(
self
):
pass
def
embedding_layer
(
input
):
sparse_feature_number
=
envs
.
get_global_env
(
"sparse_feature_number"
)
sparse_feature_dim
=
envs
.
get_global_env
(
"sparse_feature_dim"
)
def
loss
(
self
):
emb
=
fluid
.
layers
.
embedding
(
pass
input
=
input
,
is_sparse
=
True
,
size
=
[{
sparse_feature_number
},
{
sparse_feature_dim
}],
param_attr
=
fluid
.
ParamAttr
(
name
=
"SparseFeatFactors"
,
initializer
=
fluid
.
initializer
.
Uniform
()),
)
emb_sum
=
fluid
.
layers
.
sequence_pool
(
input
=
emb
,
pool_type
=
'sum'
)
return
emb_sum
def
fc
(
input
,
output_size
):
output
=
fluid
.
layers
.
fc
(
input
=
input
,
size
=
output_size
,
act
=
'relu'
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Normal
(
scale
=
1.0
/
math
.
sqrt
(
input
.
shape
[
1
]))))
return
output
sparse_embed_seq
=
list
(
map
(
embedding_layer
,
self
.
sparse_inputs
))
concated
=
fluid
.
layers
.
concat
(
sparse_embed_seq
+
[
self
.
dense_input
],
axis
=
1
)
fcs
=
[
concated
]
hidden_layers
=
envs
.
get_global_env
(
"fc_sizes"
)
for
size
in
hidden_layers
:
fcs
.
append
(
fc
(
fcs
[
-
1
],
size
))
predict
=
fluid
.
layers
.
fc
(
input
=
fcs
[
-
1
],
size
=
2
,
act
=
"softmax"
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Normal
(
scale
=
1
/
math
.
sqrt
(
fcs
[
-
1
].
shape
[
1
]))),
)
self
.
predict
=
predict
def
loss
(
self
,
predict
):
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
predict
,
label
=
self
.
label_input
)
avg_cost
=
fluid
.
layers
.
reduce_sum
(
cost
)
self
.
loss
=
avg_cost
def
metric
(
self
):
auc
,
batch_auc
,
_
=
fluid
.
layers
.
auc
(
input
=
self
.
predict
,
label
=
self
.
label_input
,
num_thresholds
=
2
**
12
,
slide_steps
=
20
)
def
optimizer
(
self
):
def
optimizer
(
self
):
learning_rate
=
envs
.
get_global_env
(
"learning_rate"
)
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
,
lazy_mode
=
True
)
return
optimizer
class
Evaluate
(
object
):
def
input
(
self
):
pass
def
net
(
self
):
pass
pass
models/ctr_dnn/reader.py
浏览文件 @
aa5a08e0
def
TrainReader
():
from
...utils
import
envs
pass
# There are 13 integer features and 26 categorical features
continous_features
=
range
(
1
,
14
)
categorial_features
=
range
(
14
,
40
)
continous_clip
=
[
20
,
600
,
100
,
50
,
64000
,
500
,
100
,
50
,
500
,
10
,
10
,
10
,
50
]
class
CriteoDataset
(
object
):
def
__init__
(
self
,
sparse_feature_dim
):
self
.
cont_min_
=
[
0
,
-
3
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
]
self
.
cont_max_
=
[
20
,
600
,
100
,
50
,
64000
,
500
,
100
,
50
,
500
,
10
,
10
,
10
,
50
]
self
.
cont_diff_
=
[
20
,
603
,
100
,
50
,
64000
,
500
,
100
,
50
,
500
,
10
,
10
,
10
,
50
]
self
.
hash_dim_
=
sparse_feature_dim
# here, training data are lines with line_index < train_idx_
self
.
train_idx_
=
41256555
self
.
continuous_range_
=
range
(
1
,
14
)
self
.
categorical_range_
=
range
(
14
,
40
)
def
_reader_creator
(
self
,
file_list
,
is_train
,
trainer_num
,
trainer_id
):
def
reader
():
for
file
in
file_list
:
with
open
(
file
,
'r'
)
as
f
:
line_idx
=
0
for
line
in
f
:
line_idx
+=
1
features
=
line
.
rstrip
(
'
\n
'
).
split
(
'
\t
'
)
dense_feature
=
[]
sparse_feature
=
[]
for
idx
in
self
.
continuous_range_
:
if
features
[
idx
]
==
''
:
dense_feature
.
append
(
0.0
)
else
:
dense_feature
.
append
(
(
float
(
features
[
idx
])
-
self
.
cont_min_
[
idx
-
1
])
/
self
.
cont_diff_
[
idx
-
1
])
for
idx
in
self
.
categorical_range_
:
sparse_feature
.
append
([
hash
(
str
(
idx
)
+
features
[
idx
])
%
self
.
hash_dim_
])
label
=
[
int
(
features
[
0
])]
yield
[
dense_feature
]
+
sparse_feature
+
[
label
]
return
reader
def
train
(
self
,
file_list
,
trainer_num
,
trainer_id
):
return
self
.
_reader_creator
(
file_list
,
True
,
trainer_num
,
trainer_id
)
def
test
(
self
,
file_list
):
return
self
.
_reader_creator
(
file_list
,
False
,
1
,
0
)
def
Train
():
sparse_feature_number
=
envs
.
get_global_env
(
"sparse_feature_number"
)
train_generator
=
CriteoDataset
(
sparse_feature_number
)
return
train_generator
.
train
def
Evaluate
():
sparse_feature_number
=
envs
.
get_global_env
(
"sparse_feature_number"
)
train_generator
=
CriteoDataset
(
sparse_feature_number
)
return
train_generator
.
test
def
InferReader
():
pass
models/ctr_dnn/sample_test.txt
浏览文件 @
aa5a08e0
此差异已折叠。
点击以展开。
models/ctr_dnn/sample_train.txt
浏览文件 @
aa5a08e0
此差异已折叠。
点击以展开。
utils/envs.py
0 → 100644
浏览文件 @
aa5a08e0
import
os
def
encode_value
(
v
):
return
v
def
decode_value
(
v
):
return
v
def
set_global_envs
(
yaml
,
envs
):
for
k
,
v
in
yaml
.
items
():
envs
[
k
]
=
encode_value
(
v
)
def
get_global_env
(
env_name
):
"""
get os environment value
"""
if
env_name
not
in
os
.
environ
:
raise
ValueError
(
"can not find config of {}"
.
format
(
env_name
))
v
=
os
.
environ
[
env_name
]
return
decode_value
(
v
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录