Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
PaddleRec
提交
38aa1162
P
PaddleRec
项目概览
BaiXuePrincess
/
PaddleRec
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleRec
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleRec
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
38aa1162
编写于
5月 29, 2020
作者:
F
frankwhzhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix esmm
上级
00b2de4b
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
83 addition
and
83 deletion
+83
-83
models/multitask/esmm/config.yaml
models/multitask/esmm/config.yaml
+47
-32
models/multitask/esmm/esmm_reader.py
models/multitask/esmm/esmm_reader.py
+0
-3
models/multitask/esmm/model.py
models/multitask/esmm/model.py
+36
-48
未找到文件。
models/multitask/esmm/config.yaml
浏览文件 @
38aa1162
...
@@ -12,40 +12,55 @@
...
@@ -12,40 +12,55 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
evaluate
:
reader
:
batch_size
:
1
class
:
"
{workspace}/esmm_infer_reader.py"
test_data_path
:
"
{workspace}/data/train"
train
:
trainer
:
# for cluster training
strategy
:
"
async"
epochs
:
3
workspace
:
"
paddlerec.models.multitask.esmm"
workspace
:
"
paddlerec.models.multitask.esmm"
device
:
cpu
reader
:
dataset
:
batch_size
:
2
-
name
:
dataset_train
class
:
"
{workspace}/esmm_reader.py"
batch_size
:
1
train_data_path
:
"
{workspace}/data/train"
type
:
QueueDataset
data_path
:
"
{workspace}/data/train"
data_converter
:
"
{workspace}/esmm_reader.py"
-
name
:
dataset_infer
batch_size
:
1
type
:
QueueDataset
data_path
:
"
{workspace}/data/test"
data_converter
:
"
{workspace}/esmm_reader.py"
model
:
hyper_parameters
:
models
:
"
{workspace}/model.py"
hyper_parameters
:
vocab_size
:
10000
vocab_size
:
10000
embed_size
:
128
embed_size
:
128
optimizer
:
class
:
adam
learning_rate
:
0.001
learning_rate
:
0.001
optimizer
:
adam
strategy
:
async
#use infer_runner mode and modify 'phase' below if infer
mode
:
train_runner
#mode: infer_runner
runner
:
-
name
:
train_runner
class
:
single_train
device
:
cpu
epochs
:
3
save_checkpoint_interval
:
2
save_inference_interval
:
4
save_checkpoint_path
:
"
increment"
save_inference_path
:
"
inference"
print_interval
:
10
-
name
:
infer_runner
class
:
single_infer
init_model_path
:
"
increment/0"
device
:
cpu
epochs
:
3
sav
e
:
phas
e
:
increment
:
-
name
:
train
dirname
:
"
increment
"
model
:
"
{workspace}/model.py
"
epoch_interval
:
2
dataset_name
:
dataset_train
save_last
:
True
thread_num
:
1
inference
:
#- name: infer
dirname
:
"
inference
"
# model: "{workspace}/model.py
"
epoch_interval
:
4
# dataset_name: dataset_infer
save_last
:
True
# thread_num: 1
models/multitask/esmm/esmm_reader.py
浏览文件 @
38aa1162
...
@@ -40,8 +40,6 @@ class TrainReader(Reader):
...
@@ -40,8 +40,6 @@ class TrainReader(Reader):
This function needs to be implemented by the user, based on data format
This function needs to be implemented by the user, based on data format
"""
"""
features
=
line
.
strip
().
split
(
','
)
features
=
line
.
strip
().
split
(
','
)
# ctr = list(map(int, features[1]))
# cvr = list(map(int, features[2]))
ctr
=
int
(
features
[
1
])
ctr
=
int
(
features
[
1
])
cvr
=
int
(
features
[
2
])
cvr
=
int
(
features
[
2
])
...
@@ -54,7 +52,6 @@ class TrainReader(Reader):
...
@@ -54,7 +52,6 @@ class TrainReader(Reader):
continue
continue
self
.
all_field_id_dict
[
field_id
][
0
]
=
True
self
.
all_field_id_dict
[
field_id
][
0
]
=
True
index
=
self
.
all_field_id_dict
[
field_id
][
1
]
index
=
self
.
all_field_id_dict
[
field_id
][
1
]
# feat_id = list(map(int, feat_id))
output
[
index
][
1
].
append
(
int
(
feat_id
))
output
[
index
][
1
].
append
(
int
(
feat_id
))
for
field_id
in
self
.
all_field_id_dict
:
for
field_id
in
self
.
all_field_id_dict
:
...
...
models/multitask/esmm/model.py
浏览文件 @
38aa1162
...
@@ -23,28 +23,11 @@ class Model(ModelBase):
...
@@ -23,28 +23,11 @@ class Model(ModelBase):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
ModelBase
.
__init__
(
self
,
config
)
ModelBase
.
__init__
(
self
,
config
)
def
fc
(
self
,
tag
,
data
,
out_dim
,
active
=
'prelu'
):
def
_init_hyper_parameters
(
self
):
self
.
vocab_size
=
envs
.
get_global_env
(
"hyper_parameters.vocab_size"
)
self
.
embed_size
=
envs
.
get_global_env
(
"hyper_parameters.embed_size"
)
init_stddev
=
1.0
def
input_data
(
self
,
is_infer
=
False
,
**
kwargs
):
scales
=
1.0
/
np
.
sqrt
(
data
.
shape
[
1
])
p_attr
=
fluid
.
param_attr
.
ParamAttr
(
name
=
'%s_weight'
%
tag
,
initializer
=
fluid
.
initializer
.
NormalInitializer
(
loc
=
0.0
,
scale
=
init_stddev
*
scales
))
b_attr
=
fluid
.
ParamAttr
(
name
=
'%s_bias'
%
tag
,
initializer
=
fluid
.
initializer
.
Constant
(
0.1
))
out
=
fluid
.
layers
.
fc
(
input
=
data
,
size
=
out_dim
,
act
=
active
,
param_attr
=
p_attr
,
bias_attr
=
b_attr
,
name
=
tag
)
return
out
def
input_data
(
self
):
sparse_input_ids
=
[
sparse_input_ids
=
[
fluid
.
data
(
fluid
.
data
(
name
=
"field_"
+
str
(
i
),
name
=
"field_"
+
str
(
i
),
...
@@ -55,26 +38,23 @@ class Model(ModelBase):
...
@@ -55,26 +38,23 @@ class Model(ModelBase):
label_ctr
=
fluid
.
data
(
name
=
"ctr"
,
shape
=
[
-
1
,
1
],
dtype
=
"int64"
)
label_ctr
=
fluid
.
data
(
name
=
"ctr"
,
shape
=
[
-
1
,
1
],
dtype
=
"int64"
)
label_cvr
=
fluid
.
data
(
name
=
"cvr"
,
shape
=
[
-
1
,
1
],
dtype
=
"int64"
)
label_cvr
=
fluid
.
data
(
name
=
"cvr"
,
shape
=
[
-
1
,
1
],
dtype
=
"int64"
)
inputs
=
sparse_input_ids
+
[
label_ctr
]
+
[
label_cvr
]
inputs
=
sparse_input_ids
+
[
label_ctr
]
+
[
label_cvr
]
self
.
_data_var
.
extend
(
inputs
)
if
is_infer
:
return
inputs
else
:
return
inputs
return
inputs
def
net
(
self
,
inputs
,
is_infer
=
False
):
def
net
(
self
,
inputs
,
is_infer
=
False
):
vocab_size
=
envs
.
get_global_env
(
"hyper_parameters.vocab_size"
,
None
,
self
.
_namespace
)
embed_size
=
envs
.
get_global_env
(
"hyper_parameters.embed_size"
,
None
,
self
.
_namespace
)
emb
=
[]
emb
=
[]
for
data
in
inputs
[
0
:
-
2
]:
for
data
in
inputs
[
0
:
-
2
]:
feat_emb
=
fluid
.
embedding
(
feat_emb
=
fluid
.
embedding
(
input
=
data
,
input
=
data
,
size
=
[
vocab_size
,
embed_size
],
size
=
[
self
.
vocab_size
,
self
.
embed_size
],
param_attr
=
fluid
.
ParamAttr
(
param_attr
=
fluid
.
ParamAttr
(
name
=
'dis_emb'
,
name
=
'dis_emb'
,
learning_rate
=
5
,
learning_rate
=
5
,
initializer
=
fluid
.
initializer
.
Xavier
(
initializer
=
fluid
.
initializer
.
Xavier
(
fan_in
=
embed_size
,
fan_out
=
embed_size
)),
fan_in
=
self
.
embed_size
,
fan_out
=
self
.
embed_size
)),
is_sparse
=
True
)
is_sparse
=
True
)
field_emb
=
fluid
.
layers
.
sequence_pool
(
field_emb
=
fluid
.
layers
.
sequence_pool
(
input
=
feat_emb
,
pool_type
=
'sum'
)
input
=
feat_emb
,
pool_type
=
'sum'
)
...
@@ -83,14 +63,14 @@ class Model(ModelBase):
...
@@ -83,14 +63,14 @@ class Model(ModelBase):
# ctr
# ctr
active
=
'relu'
active
=
'relu'
ctr_fc1
=
self
.
fc
(
'ctr_fc1'
,
concat_emb
,
200
,
active
)
ctr_fc1
=
self
.
_
fc
(
'ctr_fc1'
,
concat_emb
,
200
,
active
)
ctr_fc2
=
self
.
fc
(
'ctr_fc2'
,
ctr_fc1
,
80
,
active
)
ctr_fc2
=
self
.
_
fc
(
'ctr_fc2'
,
ctr_fc1
,
80
,
active
)
ctr_out
=
self
.
fc
(
'ctr_out'
,
ctr_fc2
,
2
,
'softmax'
)
ctr_out
=
self
.
_
fc
(
'ctr_out'
,
ctr_fc2
,
2
,
'softmax'
)
# cvr
# cvr
cvr_fc1
=
self
.
fc
(
'cvr_fc1'
,
concat_emb
,
200
,
active
)
cvr_fc1
=
self
.
_
fc
(
'cvr_fc1'
,
concat_emb
,
200
,
active
)
cvr_fc2
=
self
.
fc
(
'cvr_fc2'
,
cvr_fc1
,
80
,
active
)
cvr_fc2
=
self
.
_
fc
(
'cvr_fc2'
,
cvr_fc1
,
80
,
active
)
cvr_out
=
self
.
fc
(
'cvr_out'
,
cvr_fc2
,
2
,
'softmax'
)
cvr_out
=
self
.
_
fc
(
'cvr_out'
,
cvr_fc2
,
2
,
'softmax'
)
ctr_clk
=
inputs
[
-
2
]
ctr_clk
=
inputs
[
-
2
]
ctcvr_buy
=
inputs
[
-
1
]
ctcvr_buy
=
inputs
[
-
1
]
...
@@ -127,15 +107,23 @@ class Model(ModelBase):
...
@@ -127,15 +107,23 @@ class Model(ModelBase):
self
.
_metrics
[
"AUC_ctcvr"
]
=
auc_ctcvr
self
.
_metrics
[
"AUC_ctcvr"
]
=
auc_ctcvr
self
.
_metrics
[
"BATCH_AUC_ctcvr"
]
=
batch_auc_ctcvr
self
.
_metrics
[
"BATCH_AUC_ctcvr"
]
=
batch_auc_ctcvr
def
train_net
(
self
):
def
_fc
(
self
,
tag
,
data
,
out_dim
,
active
=
'prelu'
):
input_data
=
self
.
input_data
()
self
.
net
(
input_data
)
init_stddev
=
1.0
scales
=
1.0
/
np
.
sqrt
(
data
.
shape
[
1
])
def
infer_net
(
self
):
self
.
_infer_data_var
=
self
.
input_data
()
p_attr
=
fluid
.
param_attr
.
ParamAttr
(
self
.
_infer_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
name
=
'%s_weight'
%
tag
,
feed_list
=
self
.
_infer_data_var
,
initializer
=
fluid
.
initializer
.
NormalInitializer
(
capacity
=
64
,
loc
=
0.0
,
scale
=
init_stddev
*
scales
))
use_double_buffer
=
False
,
iterable
=
False
)
b_attr
=
fluid
.
ParamAttr
(
self
.
net
(
self
.
_infer_data_var
,
is_infer
=
True
)
name
=
'%s_bias'
%
tag
,
initializer
=
fluid
.
initializer
.
Constant
(
0.1
))
out
=
fluid
.
layers
.
fc
(
input
=
data
,
size
=
out_dim
,
act
=
active
,
param_attr
=
p_attr
,
bias_attr
=
b_attr
,
name
=
tag
)
return
out
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录