Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleRec
提交
f49acc00
P
PaddleRec
项目概览
PaddlePaddle
/
PaddleRec
通知
68
Star
12
Fork
5
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
27
列表
看板
标记
里程碑
合并请求
10
Wiki
1
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleRec
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
27
Issue
27
列表
看板
标记
里程碑
合并请求
10
合并请求
10
Pages
分析
分析
仓库分析
DevOps
Wiki
1
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
f49acc00
编写于
5月 22, 2020
作者:
F
frankwhzhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix listwise model style and readme
上级
44b8928d
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
103 addition
and
105 deletion
+103
-105
README.md
README.md
+1
-1
models/rerank/listwise/model.py
models/rerank/listwise/model.py
+93
-97
models/rerank/listwise/random_infer_reader.py
models/rerank/listwise/random_infer_reader.py
+3
-2
models/rerank/listwise/random_reader.py
models/rerank/listwise/random_reader.py
+3
-2
models/rerank/readme.md
models/rerank/readme.md
+3
-3
未找到文件。
README.md
浏览文件 @
f49acc00
...
...
@@ -177,7 +177,7 @@ python -m paddlerec.run -m ./models/rank/dnn/config.yaml -b backend.yaml
| 多任务 |
[
ESMM
](
models/multitask/esmm/model.py
)
| ✓ | ✓ | ✓ |
| 多任务 |
[
MMOE
](
models/multitask/mmoe/model.py
)
| ✓ | ✓ | ✓ |
| 多任务 |
[
ShareBottom
](
models/multitask/share-bottom/model.py
)
| ✓ | ✓ | ✓ |
|
融合
|
[
Listwise
](
models/rerank/listwise/model.py
)
| ✓ | x | ✓ |
|
重排序
|
[
Listwise
](
models/rerank/listwise/model.py
)
| ✓ | x | ✓ |
...
...
models/rerank/listwise/model.py
浏览文件 @
f49acc00
...
...
@@ -13,20 +13,28 @@
# limitations under the License.
import
math
import
numpy
as
np
import
paddle.fluid
as
fluid
from
paddlerec.core.utils
import
envs
from
paddlerec.core.model
import
Model
as
ModelBase
import
numpy
as
np
class
Model
(
ModelBase
):
def
__init__
(
self
,
config
):
ModelBase
.
__init__
(
self
,
config
)
self
.
item_len
=
envs
.
get_global_env
(
"hyper_parameters.self.item_len"
,
None
,
self
.
_namespace
)
self
.
hidden_size
=
envs
.
get_global_env
(
"hyper_parameters.hidden_size"
,
None
,
self
.
_namespace
)
self
.
user_vocab
=
envs
.
get_global_env
(
"hyper_parameters.user_vocab"
,
None
,
self
.
_namespace
)
self
.
item_vocab
=
envs
.
get_global_env
(
"hyper_parameters.item_vocab"
,
None
,
self
.
_namespace
)
self
.
embed_size
=
envs
.
get_global_env
(
"hyper_parameters.embed_size"
,
None
,
self
.
_namespace
)
def
input_data
(
self
,
is_infer
=
False
):
item_len
=
envs
.
get_global_env
(
"hyper_parameters.item_len"
,
None
,
self
.
_namespace
)
user_slot_names
=
fluid
.
data
(
name
=
'user_slot_names'
,
shape
=
[
None
,
1
],
...
...
@@ -34,14 +42,18 @@ class Model(ModelBase):
lod_level
=
1
)
item_slot_names
=
fluid
.
data
(
name
=
'item_slot_names'
,
shape
=
[
None
,
item_len
],
shape
=
[
None
,
self
.
item_len
],
dtype
=
'int64'
,
lod_level
=
1
)
lens
=
fluid
.
data
(
name
=
'lens'
,
shape
=
[
None
],
dtype
=
'int64'
)
labels
=
fluid
.
data
(
name
=
'labels'
,
shape
=
[
None
,
item_len
],
dtype
=
'int64'
,
lod_level
=
1
)
name
=
'labels'
,
shape
=
[
None
,
self
.
item_len
],
dtype
=
'int64'
,
lod_level
=
1
)
inputs
=
[
user_slot_names
]
+
[
item_slot_names
]
+
[
lens
]
+
[
labels
]
if
is_infer
:
self
.
_infer_data_var
=
inputs
self
.
_infer_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
...
...
@@ -59,53 +71,7 @@ class Model(ModelBase):
return
inputs
def
default_normal_initializer
(
self
,
nf
=
128
):
return
fluid
.
initializer
.
TruncatedNormal
(
loc
=
0.0
,
scale
=
np
.
sqrt
(
1.0
/
nf
))
def
default_regularizer
(
self
):
return
None
def
default_fc
(
self
,
data
,
size
,
num_flatten_dims
=
1
,
act
=
None
,
name
=
None
):
return
fluid
.
layers
.
fc
(
input
=
data
,
size
=
size
,
num_flatten_dims
=
num_flatten_dims
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
self
.
default_normal_initializer
(
size
),
regularizer
=
self
.
default_regularizer
()),
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.0
),
regularizer
=
self
.
default_regularizer
()),
act
=
act
,
name
=
name
)
def
default_embedding
(
self
,
data
,
vocab_size
,
embed_size
):
reg
=
fluid
.
regularizer
.
L2Decay
(
1e-5
)
# IMPORTANT, to prevent overfitting.
embed
=
fluid
.
embedding
(
input
=
data
,
size
=
[
vocab_size
,
embed_size
],
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Xavier
(),
regularizer
=
reg
),
is_sparse
=
True
)
return
embed
def
default_drnn
(
self
,
data
,
nf
,
is_reverse
,
h_0
):
return
fluid
.
layers
.
dynamic_gru
(
input
=
data
,
size
=
nf
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
self
.
default_normal_initializer
(
nf
),
regularizer
=
self
.
default_regularizer
()),
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.0
),
regularizer
=
self
.
default_regularizer
()),
is_reverse
=
is_reverse
,
h_0
=
h_0
)
def
fluid_sequence_pad
(
self
,
input
,
pad_value
,
maxlen
=
None
):
def
_fluid_sequence_pad
(
self
,
input
,
pad_value
,
maxlen
=
None
):
"""
args:
input: (batch*seq_len, dim)
...
...
@@ -121,7 +87,7 @@ class Model(ModelBase):
# TODO, maxlen=300, used to solve issues: https://github.com/PaddlePaddle/Paddle/issues/14164
return
input_padded
def
fluid_sequence_get_pos
(
self
,
lodtensor
):
def
_
fluid_sequence_get_pos
(
self
,
lodtensor
):
"""
args:
lodtensor: lod = [[0,4,7]]
...
...
@@ -134,7 +100,7 @@ class Model(ModelBase):
assert
lodtensor
.
shape
==
(
-
1
,
1
),
(
lodtensor
.
shape
())
ones
=
fluid
.
layers
.
cast
(
lodtensor
*
0
+
1
,
'float32'
)
# (batch*seq_len, 1)
ones_padded
=
self
.
fluid_sequence_pad
(
ones
,
ones_padded
=
self
.
_
fluid_sequence_pad
(
ones
,
0
)
# (batch, max_seq_len, 1)
ones_padded
=
fluid
.
layers
.
squeeze
(
ones_padded
,
[
2
])
# (batch, max_seq_len)
...
...
@@ -151,76 +117,106 @@ class Model(ModelBase):
return
pos
def
net
(
self
,
inputs
,
is_infer
=
False
):
hidden_size
=
envs
.
get_global_env
(
"hyper_parameters.hidden_size"
,
None
,
self
.
_namespace
)
user_vocab
=
envs
.
get_global_env
(
"hyper_parameters.user_vocab"
,
None
,
self
.
_namespace
)
item_vocab
=
envs
.
get_global_env
(
"hyper_parameters.item_vocab"
,
None
,
self
.
_namespace
)
embed_size
=
envs
.
get_global_env
(
"hyper_parameters.embed_size"
,
None
,
self
.
_namespace
)
#encode
user_embedding
=
self
.
default_embedding
(
inputs
[
0
],
user_vocab
,
embed_size
)
user_feature
=
self
.
default_fc
(
data
=
user_embedding
,
size
=
hidden_size
,
num_flatten_dims
=
1
,
# user encode
user_embedding
=
fluid
.
embedding
(
input
=
inputs
[
0
],
size
=
[
self
.
user_vocab
,
self
.
embed_size
],
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Xavier
(),
regularizer
=
fluid
.
regularizer
.
L2Decay
(
1e-5
)),
is_sparse
=
True
)
user_feature
=
fluid
.
layers
.
fc
(
input
=
user_embedding
,
size
=
self
.
hidden_size
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
TruncatedNormal
(
loc
=
0.0
,
scale
=
np
.
sqrt
(
1.0
/
self
.
hidden_size
))),
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.0
)),
act
=
'relu'
,
name
=
'user_feature_fc'
)
# item encode
item_embedding
=
fluid
.
embedding
(
input
=
inputs
[
1
],
size
=
[
self
.
item_vocab
,
self
.
embed_size
],
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Xavier
(),
regularizer
=
fluid
.
regularizer
.
L2Decay
(
1e-5
)),
is_sparse
=
True
)
item_embedding
=
self
.
default_embedding
(
inputs
[
1
],
item_vocab
,
embed_size
)
item_embedding
=
fluid
.
layers
.
sequence_unpad
(
x
=
item_embedding
,
length
=
inputs
[
2
])
item_fc
=
self
.
default_fc
(
data
=
item_embedding
,
size
=
hidden_size
,
num_flatten_dims
=
1
,
item_fc
=
fluid
.
layers
.
fc
(
input
=
item_embedding
,
size
=
self
.
hidden_size
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
TruncatedNormal
(
loc
=
0.0
,
scale
=
np
.
sqrt
(
1.0
/
self
.
hidden_size
))),
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.0
)),
act
=
'relu'
,
name
=
'item_fc'
)
pos
=
self
.
fluid_sequence_get_pos
(
item_fc
)
pos_embed
=
self
.
default_embedding
(
pos
,
user_vocab
,
embed_size
)
pos
=
self
.
_fluid_sequence_get_pos
(
item_fc
)
pos_embed
=
fluid
.
embedding
(
input
=
pos
,
size
=
[
self
.
user_vocab
,
self
.
embed_size
],
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Xavier
(),
regularizer
=
fluid
.
regularizer
.
L2Decay
(
1e-5
)),
is_sparse
=
True
)
pos_embed
=
fluid
.
layers
.
squeeze
(
pos_embed
,
[
1
])
# item gru
gru_input
=
self
.
default_fc
(
data
=
fluid
.
layers
.
concat
([
item_fc
,
pos_embed
],
1
),
size
=
hidden_size
*
3
,
num_flatten_dims
=
1
,
act
=
'relu'
,
gru_input
=
fluid
.
layers
.
fc
(
input
=
fluid
.
layers
.
concat
([
item_fc
,
pos_embed
],
1
),
size
=
self
.
hidden_size
*
3
,
name
=
'item_gru_fc'
)
item_gru_forward
=
self
.
default_drnn
(
data
=
gru_input
,
nf
=
hidden_size
,
h_0
=
user_feature
,
is_reverse
=
False
)
# forward gru
item_gru_forward
=
fluid
.
layers
.
dynamic_gru
(
input
=
gru_input
,
size
=
self
.
hidden_size
,
is_reverse
=
False
,
h_0
=
user_feature
)
# backward gru
item_gru_backward
=
fluid
.
layers
.
dynamic_gru
(
input
=
gru_input
,
size
=
self
.
hidden_size
,
is_reverse
=
True
,
h_0
=
user_feature
)
item_gru_backward
=
self
.
default_drnn
(
data
=
gru_input
,
nf
=
hidden_size
,
h_0
=
user_feature
,
is_reverse
=
True
)
item_gru
=
fluid
.
layers
.
concat
(
[
item_gru_forward
,
item_gru_backward
],
axis
=
1
)
out_click_fc1
=
self
.
default_fc
(
data
=
item_gru
,
size
=
hidden_size
,
num_flatten_dims
=
1
,
out_click_fc1
=
fluid
.
layers
.
fc
(
input
=
item_gru
,
size
=
self
.
hidden_size
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
TruncatedNormal
(
loc
=
0.0
,
scale
=
np
.
sqrt
(
1.0
/
self
.
hidden_size
))),
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.0
)),
act
=
'relu'
,
name
=
'out_click_fc1'
)
click_prob
=
self
.
default_fc
(
data
=
out_click_fc1
,
click_prob
=
fluid
.
layers
.
fc
(
input
=
out_click_fc1
,
size
=
2
,
num_flatten_dims
=
1
,
act
=
'softmax'
,
name
=
'out_click_fc2'
)
labels
=
fluid
.
layers
.
sequence_unpad
(
x
=
inputs
[
3
],
length
=
inputs
[
2
])
auc_val
,
batch_auc
,
auc_states
=
fluid
.
layers
.
auc
(
input
=
click_prob
,
label
=
labels
)
if
is_infer
:
self
.
_infer_results
[
"AUC"
]
=
auc_val
return
loss
=
fluid
.
layers
.
reduce_mean
(
fluid
.
layers
.
cross_entropy
(
input
=
click_prob
,
label
=
labels
))
...
...
models/rerank/listwise/random_infer_reader.py
浏览文件 @
f49acc00
...
...
@@ -13,11 +13,12 @@
# limitations under the License.
from
__future__
import
print_function
import
numpy
as
np
import
paddle.fluid
as
fluid
from
paddlerec.core.reader
import
Reader
from
paddlerec.core.utils
import
envs
from
collections
import
defaultdict
import
paddle.fluid
as
fluid
import
numpy
as
np
class
EvaluateReader
(
Reader
):
...
...
models/rerank/listwise/random_reader.py
浏览文件 @
f49acc00
...
...
@@ -13,11 +13,12 @@
# limitations under the License.
from
__future__
import
print_function
import
numpy
as
np
import
paddle.fluid
as
fluid
from
paddlerec.core.reader
import
Reader
from
paddlerec.core.utils
import
envs
from
collections
import
defaultdict
import
paddle.fluid
as
fluid
import
numpy
as
np
class
TrainReader
(
Reader
):
...
...
models/rerank/readme.md
浏览文件 @
f49acc00
#
融合
模型库
#
重排序
模型库
## 简介
我们提供了常见的
多路排序融合
使用的模型算法的PaddleRec实现, 单机训练&预测效果指标以及分布式训练&预测性能指标等。目前实现的模型是
[
Listwise
](
listwise
)
。
我们提供了常见的
重排序
使用的模型算法的PaddleRec实现, 单机训练&预测效果指标以及分布式训练&预测性能指标等。目前实现的模型是
[
Listwise
](
listwise
)
。
模型算法库在持续添加中,欢迎关注。
## 目录
*
[
整体介绍
](
#整体介绍
)
*
[
融合模型列表
](
#融合
模型列表
)
*
[
重排序模型列表
](
#重排序
模型列表
)
*
[
使用教程
](
#使用教程
)
*
[
训练 预测
](
#训练
预测)
*
[
效果对比
](
#效果对比
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录