Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
PaddleRec
提交
bae118ee
P
PaddleRec
项目概览
BaiXuePrincess
/
PaddleRec
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleRec
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleRec
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
bae118ee
编写于
6月 08, 2020
作者:
W
wuzhihua
提交者:
GitHub
6月 08, 2020
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #48 from yaoxuefeng6/add_deep_crossing
add deep_crossing
上级
b76b6ad2
eeea5882
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
232 addition
and
0 deletion
+232
-0
models/rank/deep_crossing/__init__.py
models/rank/deep_crossing/__init__.py
+13
-0
models/rank/deep_crossing/config.yaml
models/rank/deep_crossing/config.yaml
+75
-0
models/rank/deep_crossing/model.py
models/rank/deep_crossing/model.py
+144
-0
未找到文件。
models/rank/deep_crossing/__init__.py
0 → 100755
浏览文件 @
bae118ee
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
models/rank/deep_crossing/config.yaml
0 → 100755
浏览文件 @
bae118ee
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# global settings
debug
:
false
workspace
:
"
paddlerec.models.rank.deep_crossing"
dataset
:
-
name
:
train_sample
type
:
QueueDataset
batch_size
:
5
data_path
:
"
{workspace}/../dataset/Criteo_data/sample_data/train"
sparse_slots
:
"
label
feat_idx"
dense_slots
:
"
feat_value:39"
-
name
:
infer_sample
type
:
QueueDataset
batch_size
:
5
data_path
:
"
{workspace}/../dataset/Criteo_data/sample_data/train"
sparse_slots
:
"
label
feat_idx"
dense_slots
:
"
feat_value:39"
hyper_parameters
:
# 用户自定义配置
optimizer
:
class
:
SGD
learning_rate
:
0.0001
sparse_feature_number
:
1086460
sparse_feature_dim
:
8
reg
:
0.001
num_field
:
39
residual_unit_num
:
4
residual_w_dim
:
128
mode
:
train_runner
# if infer, change mode to "infer_runner" and change phase to "infer_phase"
runner
:
-
name
:
train_runner
trainer_class
:
single_train
epochs
:
1
device
:
cpu
init_model_path
:
"
"
save_checkpoint_interval
:
1
save_inference_interval
:
1
save_checkpoint_path
:
"
increment"
save_inference_path
:
"
inference"
print_interval
:
1
-
name
:
infer_runner
trainer_class
:
single_infer
epochs
:
1
device
:
cpu
init_model_path
:
"
increment/0"
print_interval
:
1
phase
:
-
name
:
phase1
model
:
"
{workspace}/model.py"
dataset_name
:
train_sample
thread_num
:
1
#- name: infer_phase
# model: "{workspace}/model.py"
# dataset_name: infer_sample
# thread_num: 1
models/rank/deep_crossing/model.py
0 → 100755
浏览文件 @
bae118ee
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
math
from
collections
import
OrderedDict
import
paddle.fluid
as
fluid
from
paddlerec.core.utils
import
envs
from
paddlerec.core.model
import
Model
as
ModelBase
class
Model
(
ModelBase
):
def
__init__
(
self
,
config
):
ModelBase
.
__init__
(
self
,
config
)
def
_init_hyper_parameters
(
self
):
self
.
is_distributed
=
True
if
envs
.
get_trainer
(
)
==
"CtrTrainer"
else
False
self
.
sparse_feature_number
=
envs
.
get_global_env
(
"hyper_parameters.sparse_feature_number"
,
None
)
self
.
sparse_feature_dim
=
envs
.
get_global_env
(
"hyper_parameters.sparse_feature_dim"
,
None
)
self
.
reg
=
envs
.
get_global_env
(
"hyper_parameters.reg"
,
1e-4
)
self
.
num_field
=
envs
.
get_global_env
(
"hyper_parameters.num_field"
,
None
)
self
.
residual_unit_num
=
envs
.
get_global_env
(
"hyper_parameters.residual_unit_num"
,
1
)
self
.
residual_w_dim
=
envs
.
get_global_env
(
"hyper_parameters.residual_w_dim"
,
32
)
self
.
concat_size
=
self
.
num_field
*
(
self
.
sparse_feature_dim
+
1
)
def
resudual_unit
(
self
,
x
):
inter_layer
=
fluid
.
layers
.
fc
(
input
=
x
,
size
=
self
.
residual_w_dim
,
act
=
'relu'
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Normal
(
scale
=
1.0
/
math
.
sqrt
(
self
.
concat_size
))))
output
=
fluid
.
layers
.
fc
(
input
=
inter_layer
,
size
=
self
.
concat_size
,
act
=
None
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Normal
(
scale
=
1.0
/
math
.
sqrt
(
self
.
residual_w_dim
))))
output
=
output
+
x
return
fluid
.
layers
.
relu6
(
output
,
threshold
=
10000000.0
)
def
net
(
self
,
inputs
,
is_infer
=
False
):
raw_feat_idx
=
self
.
_sparse_data_var
[
1
]
# (batch_size * num_field) * 1
raw_feat_value
=
self
.
_dense_data_var
[
0
]
# batch_size * num_field
self
.
label
=
self
.
_sparse_data_var
[
0
]
# batch_size * 1
init_value_
=
0.1
feat_idx
=
raw_feat_idx
feat_value
=
fluid
.
layers
.
reshape
(
raw_feat_value
,
[
-
1
,
self
.
num_field
,
1
])
# batch_size * num_field * 1
# ------------------------- first order term --------------------------
first_weights_re
=
fluid
.
embedding
(
input
=
feat_idx
,
is_sparse
=
True
,
is_distributed
=
self
.
is_distributed
,
dtype
=
'float32'
,
size
=
[
self
.
sparse_feature_number
+
1
,
1
],
padding_idx
=
0
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
TruncatedNormalInitializer
(
loc
=
0.0
,
scale
=
init_value_
),
regularizer
=
fluid
.
regularizer
.
L1DecayRegularizer
(
self
.
reg
))
)
# (batch_size * num_field) * 1 * 1(embedding_size)
first_weights
=
fluid
.
layers
.
reshape
(
first_weights_re
,
shape
=
[
-
1
,
self
.
num_field
,
1
])
# batch_size * num_field * 1
# ------------------------- second order term --------------------------
feat_embeddings_re
=
fluid
.
embedding
(
input
=
feat_idx
,
is_sparse
=
True
,
is_distributed
=
self
.
is_distributed
,
dtype
=
'float32'
,
size
=
[
self
.
sparse_feature_number
+
1
,
self
.
sparse_feature_dim
],
padding_idx
=
0
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
TruncatedNormalInitializer
(
loc
=
0.0
,
scale
=
init_value_
/
math
.
sqrt
(
float
(
self
.
sparse_feature_dim
))))
)
# (batch_size * num_field) * 1 * embedding_size
feat_embeddings
=
fluid
.
layers
.
reshape
(
feat_embeddings_re
,
shape
=
[
-
1
,
self
.
num_field
,
self
.
sparse_feature_dim
])
# batch_size * num_field * embedding_size
feat_embeddings
=
feat_embeddings
*
feat_value
# batch_size * num_field * embedding_size
concated
=
fluid
.
layers
.
concat
(
[
feat_embeddings
,
first_weights
],
axis
=
2
)
concated
=
fluid
.
layers
.
reshape
(
concated
,
shape
=
[
-
1
,
self
.
num_field
*
(
self
.
sparse_feature_dim
+
1
)])
for
_
in
range
(
self
.
residual_unit_num
):
concated
=
self
.
resudual_unit
(
concated
)
predict
=
fluid
.
layers
.
fc
(
input
=
concated
,
size
=
1
,
act
=
"sigmoid"
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Normal
(
scale
=
1
/
math
.
sqrt
(
self
.
concat_size
))))
self
.
predict
=
predict
cost
=
fluid
.
layers
.
log_loss
(
input
=
self
.
predict
,
label
=
fluid
.
layers
.
cast
(
self
.
label
,
"float32"
))
# log_loss
avg_cost
=
fluid
.
layers
.
reduce_sum
(
cost
)
self
.
_cost
=
avg_cost
predict_2d
=
fluid
.
layers
.
concat
([
1
-
self
.
predict
,
self
.
predict
],
1
)
label_int
=
fluid
.
layers
.
cast
(
self
.
label
,
'int64'
)
auc_var
,
batch_auc_var
,
_
=
fluid
.
layers
.
auc
(
input
=
predict_2d
,
label
=
label_int
,
slide_steps
=
0
)
self
.
_metrics
[
"AUC"
]
=
auc_var
self
.
_metrics
[
"BATCH_AUC"
]
=
batch_auc_var
if
is_infer
:
self
.
_infer_results
[
"AUC"
]
=
auc_var
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录