Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
PaddleRec
提交
f79f4a0e
P
PaddleRec
项目概览
BaiXuePrincess
/
PaddleRec
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleRec
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleRec
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
f79f4a0e
编写于
6月 04, 2020
作者:
Y
yaoxuefeng
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add afm in rank and fix bugs
上级
9a14e43e
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
267 addition
and
3 deletion
+267
-3
core/utils/envs.py
core/utils/envs.py
+1
-0
core/utils/validation.py
core/utils/validation.py
+1
-3
models/rank/afm/__init__.py
models/rank/afm/__init__.py
+13
-0
models/rank/afm/config.yaml
models/rank/afm/config.yaml
+76
-0
models/rank/afm/model.py
models/rank/afm/model.py
+176
-0
未找到文件。
core/utils/envs.py
浏览文件 @
f79f4a0e
...
...
@@ -13,6 +13,7 @@
# limitations under the License.
from
contextlib
import
closing
import
yaml
import
copy
import
os
import
socket
...
...
core/utils/validation.py
浏览文件 @
f79f4a0e
...
...
@@ -120,9 +120,7 @@ def register():
validations
[
"train.engine"
]
=
ValueFormat
(
"str"
,
[
"single"
,
"local_cluster"
,
"cluster"
],
in_value_handler
)
requires
=
[
"train.namespace"
,
"train.device"
,
"train.epochs"
,
"train.engine"
]
requires
=
[
"workspace"
,
"dataset"
,
"mode"
,
"runner"
,
"mode"
]
return
validations
,
requires
...
...
models/rank/afm/__init__.py
0 → 100755
浏览文件 @
f79f4a0e
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
models/rank/afm/config.yaml
0 → 100755
浏览文件 @
f79f4a0e
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# global settings
debug
:
false
workspace
:
"
paddlerec.models.rank.afm"
dataset
:
-
name
:
train_sample
type
:
QueueDataset
batch_size
:
5
data_path
:
"
{workspace}/../dataset/Criteo_data/sample_data/train"
sparse_slots
:
"
label
feat_idx"
dense_slots
:
"
feat_value:39"
-
name
:
infer_sample
type
:
QueueDataset
batch_size
:
5
data_path
:
"
{workspace}/../dataset/Criteo_data/sample_data/train"
sparse_slots
:
"
label
feat_idx"
dense_slots
:
"
feat_value:39"
# 用户自定义配置
hyper_parameters
:
optimizer
:
class
:
Adam
learning_rate
:
0.0001
sparse_feature_number
:
1086460
sparse_feature_dim
:
16
is_sparse
:
False
reg
:
0.001
num_field
:
39
act
:
"
relu"
hidden1_attention_size
:
16
mode
:
train_runner
# if infer, change mode to "infer_runner" and change phase to "infer_phase"
runner
:
-
name
:
train_runner
trainer_class
:
single_train
epochs
:
1
device
:
cpu
init_model_path
:
"
"
save_checkpoint_interval
:
1
save_inference_interval
:
1
save_checkpoint_path
:
"
increment"
save_inference_path
:
"
inference"
print_interval
:
1
-
name
:
infer_runner
trainer_class
:
single_infer
epochs
:
1
device
:
cpu
init_model_path
:
"
increment/0"
print_interval
:
1
phase
:
-
name
:
phase1
model
:
"
{workspace}/model.py"
dataset_name
:
train_sample
thread_num
:
1
#- name: infer_phase
# model: "{workspace}/model.py"
# dataset_name: infer_sample
# thread_num: 1
models/rank/afm/model.py
0 → 100755
浏览文件 @
f79f4a0e
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
math
from
collections
import
OrderedDict
import
paddle.fluid
as
fluid
from
paddlerec.core.utils
import
envs
from
paddlerec.core.model
import
Model
as
ModelBase
class
Model
(
ModelBase
):
def
__init__
(
self
,
config
):
ModelBase
.
__init__
(
self
,
config
)
def
_init_hyper_parameters
(
self
):
self
.
is_distributed
=
True
if
envs
.
get_trainer
(
)
==
"CtrTrainer"
else
False
self
.
sparse_feature_number
=
envs
.
get_global_env
(
"hyper_parameters.sparse_feature_number"
,
None
)
self
.
sparse_feature_dim
=
envs
.
get_global_env
(
"hyper_parameters.sparse_feature_dim"
,
None
)
self
.
is_sparse
=
envs
.
get_global_env
(
"hyper_parameters.is_sparse"
,
False
)
self
.
reg
=
envs
.
get_global_env
(
"hyper_parameters.reg"
,
1e-4
)
self
.
num_field
=
envs
.
get_global_env
(
"hyper_parameters.num_field"
,
None
)
self
.
hidden1_attention_size
=
envs
.
get_global_env
(
"hyper_parameters.hidden1_attention_size"
,
16
)
self
.
attention_act
=
envs
.
get_global_env
(
"hyper_parameters.act"
,
"relu"
)
def
net
(
self
,
inputs
,
is_infer
=
False
):
raw_feat_idx
=
self
.
_sparse_data_var
[
1
]
# (batch_size * num_field) * 1
raw_feat_value
=
self
.
_dense_data_var
[
0
]
# batch_size * num_field
self
.
label
=
self
.
_sparse_data_var
[
0
]
# batch_size * 1
init_value_
=
0.1
feat_idx
=
raw_feat_idx
feat_value
=
fluid
.
layers
.
reshape
(
raw_feat_value
,
[
-
1
,
self
.
num_field
,
1
])
# batch_size * num_field * 1
# ------------------------- first order term --------------------------
first_weights_re
=
fluid
.
embedding
(
input
=
feat_idx
,
is_sparse
=
self
.
is_sparse
,
is_distributed
=
self
.
is_distributed
,
dtype
=
'float32'
,
size
=
[
self
.
sparse_feature_number
+
1
,
1
],
padding_idx
=
0
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
TruncatedNormalInitializer
(
loc
=
0.0
,
scale
=
init_value_
),
regularizer
=
fluid
.
regularizer
.
L1DecayRegularizer
(
self
.
reg
))
)
# (batch_size * num_field) * 1 * 1(embedding_size)
first_weights
=
fluid
.
layers
.
reshape
(
first_weights_re
,
shape
=
[
-
1
,
self
.
num_field
,
1
])
# batch_size * num_field * 1
y_first_order
=
fluid
.
layers
.
reduce_sum
((
first_weights
*
feat_value
),
1
)
# batch_size * 1
# ------------------------- Pair-wise Interaction Layer --------------------------
feat_embeddings_re
=
fluid
.
embedding
(
input
=
feat_idx
,
is_sparse
=
self
.
is_sparse
,
is_distributed
=
self
.
is_distributed
,
dtype
=
'float32'
,
size
=
[
self
.
sparse_feature_number
+
1
,
self
.
sparse_feature_dim
],
padding_idx
=
0
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
TruncatedNormalInitializer
(
loc
=
0.0
,
scale
=
init_value_
/
math
.
sqrt
(
float
(
self
.
sparse_feature_dim
))))
)
# (batch_size * num_field) * 1 * embedding_size
feat_embeddings
=
fluid
.
layers
.
reshape
(
feat_embeddings_re
,
shape
=
[
-
1
,
self
.
num_field
,
self
.
sparse_feature_dim
])
# batch_size * num_field * embedding_size
feat_embeddings
=
feat_embeddings
*
feat_value
# batch_size * num_field * embedding_size
element_wise_product_list
=
[]
for
i
in
range
(
self
.
num_field
):
for
j
in
range
(
i
+
1
,
self
.
num_field
):
element_wise_product_list
.
append
(
feat_embeddings
[:,
i
,
:]
*
feat_embeddings
[:,
j
,
:])
# list(batch_size * embedding_size)
stack_element_wise_product
=
fluid
.
layers
.
stack
(
element_wise_product_list
,
axis
=
0
)
# (num_field*(num_field-1)/2) * batch_size * embedding_size
stack_element_wise_product
=
fluid
.
layers
.
transpose
(
stack_element_wise_product
,
perm
=
[
1
,
0
,
2
]
)
# batch_size * (num_field*(num_field-1)/2) * embedding_size
# ------------------------- Attention-based Pooling --------------------------
attetion_mul
=
fluid
.
layers
.
fc
(
input
=
fluid
.
layers
.
reshape
(
stack_element_wise_product
,
shape
=
[
-
1
,
self
.
sparse_feature_dim
]),
size
=
self
.
hidden1_attention_size
,
act
=
self
.
attention_act
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
TruncatedNormalInitializer
(
loc
=
0.0
,
scale
=
init_value_
)),
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
TruncatedNormalInitializer
(
loc
=
0.0
,
scale
=
init_value_
))
)
# (batch_size * (num_field*(num_field-1)/2)) * hidden1_attention_size
attention_h
=
fluid
.
layers
.
create_parameter
(
shape
=
[
self
.
hidden1_attention_size
,
1
],
dtype
=
"float32"
)
attention_out
=
fluid
.
layers
.
matmul
(
attetion_mul
,
attention_h
)
# (batch_size * (num_field*(num_field-1)/2)) * 1
attention_out
=
fluid
.
layers
.
softmax
(
attention_out
)
# (batch_size * (num_field*(num_field-1)/2)) * 1
num_interactions
=
self
.
num_field
*
(
self
.
num_field
-
1
)
/
2
attention_out
=
fluid
.
layers
.
reshape
(
attention_out
,
shape
=
[
-
1
,
num_interactions
,
1
])
# batch_size * (num_field*(num_field-1)/2) * 1
attention_pooling
=
fluid
.
layers
.
matmul
(
attention_out
,
stack_element_wise_product
,
transpose_x
=
True
)
# batch_size * 1 * embedding_size
attention_pooling
=
fluid
.
layers
.
reshape
(
attention_pooling
,
shape
=
[
-
1
,
self
.
sparse_feature_dim
])
# batch_size * embedding_size
y_AFM
=
fluid
.
layers
.
fc
(
input
=
attention_pooling
,
size
=
1
,
act
=
None
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
TruncatedNormalInitializer
(
loc
=
0.0
,
scale
=
init_value_
)),
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
TruncatedNormalInitializer
(
loc
=
0.0
,
scale
=
init_value_
)))
# batch_size * 1
# ------------------------- Predict --------------------------
self
.
predict
=
fluid
.
layers
.
sigmoid
(
y_first_order
+
y_AFM
)
cost
=
fluid
.
layers
.
log_loss
(
input
=
self
.
predict
,
label
=
fluid
.
layers
.
cast
(
self
.
label
,
"float32"
))
# log_loss
avg_cost
=
fluid
.
layers
.
reduce_sum
(
cost
)
self
.
_cost
=
avg_cost
predict_2d
=
fluid
.
layers
.
concat
([
1
-
self
.
predict
,
self
.
predict
],
1
)
label_int
=
fluid
.
layers
.
cast
(
self
.
label
,
'int64'
)
auc_var
,
batch_auc_var
,
_
=
fluid
.
layers
.
auc
(
input
=
predict_2d
,
label
=
label_int
,
slide_steps
=
0
)
self
.
_metrics
[
"AUC"
]
=
auc_var
self
.
_metrics
[
"BATCH_AUC"
]
=
batch_auc_var
if
is_infer
:
self
.
_infer_results
[
"AUC"
]
=
auc_var
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录