提交 f49acc00 编写于 作者: F frankwhzhang

fix listwise model style and readme

上级 44b8928d
......@@ -177,7 +177,7 @@ python -m paddlerec.run -m ./models/rank/dnn/config.yaml -b backend.yaml
| 多任务 | [ESMM](models/multitask/esmm/model.py) | ✓ | ✓ | ✓ |
| 多任务 | [MMOE](models/multitask/mmoe/model.py) | ✓ | ✓ | ✓ |
| 多任务 | [ShareBottom](models/multitask/share-bottom/model.py) | ✓ | ✓ | ✓ |
| 融合 | [Listwise](models/rerank/listwise/model.py) | ✓ | x | ✓ |
| 重排序 | [Listwise](models/rerank/listwise/model.py) | ✓ | x | ✓ |
......
......@@ -13,20 +13,28 @@
# limitations under the License.
import math
import numpy as np
import paddle.fluid as fluid
from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase
import numpy as np
class Model(ModelBase):
def __init__(self, config):
ModelBase.__init__(self, config)
self.item_len = envs.get_global_env("hyper_parameters.self.item_len",
None, self._namespace)
self.hidden_size = envs.get_global_env("hyper_parameters.hidden_size",
None, self._namespace)
self.user_vocab = envs.get_global_env("hyper_parameters.user_vocab",
None, self._namespace)
self.item_vocab = envs.get_global_env("hyper_parameters.item_vocab",
None, self._namespace)
self.embed_size = envs.get_global_env("hyper_parameters.embed_size",
None, self._namespace)
def input_data(self, is_infer=False):
item_len = envs.get_global_env("hyper_parameters.item_len", None,
self._namespace)
user_slot_names = fluid.data(
name='user_slot_names',
shape=[None, 1],
......@@ -34,14 +42,18 @@ class Model(ModelBase):
lod_level=1)
item_slot_names = fluid.data(
name='item_slot_names',
shape=[None, item_len],
shape=[None, self.item_len],
dtype='int64',
lod_level=1)
lens = fluid.data(name='lens', shape=[None], dtype='int64')
labels = fluid.data(
name='labels', shape=[None, item_len], dtype='int64', lod_level=1)
name='labels',
shape=[None, self.item_len],
dtype='int64',
lod_level=1)
inputs = [user_slot_names] + [item_slot_names] + [lens] + [labels]
if is_infer:
self._infer_data_var = inputs
self._infer_data_loader = fluid.io.DataLoader.from_generator(
......@@ -59,53 +71,7 @@ class Model(ModelBase):
return inputs
def default_normal_initializer(self, nf=128):
return fluid.initializer.TruncatedNormal(
loc=0.0, scale=np.sqrt(1.0 / nf))
def default_regularizer(self):
return None
def default_fc(self, data, size, num_flatten_dims=1, act=None, name=None):
return fluid.layers.fc(
input=data,
size=size,
num_flatten_dims=num_flatten_dims,
param_attr=fluid.ParamAttr(
initializer=self.default_normal_initializer(size),
regularizer=self.default_regularizer()),
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.0),
regularizer=self.default_regularizer()),
act=act,
name=name)
def default_embedding(self, data, vocab_size, embed_size):
reg = fluid.regularizer.L2Decay(
1e-5) # IMPORTANT, to prevent overfitting.
embed = fluid.embedding(
input=data,
size=[vocab_size, embed_size],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Xavier(), regularizer=reg),
is_sparse=True)
return embed
def default_drnn(self, data, nf, is_reverse, h_0):
return fluid.layers.dynamic_gru(
input=data,
size=nf,
param_attr=fluid.ParamAttr(
initializer=self.default_normal_initializer(nf),
regularizer=self.default_regularizer()),
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.0),
regularizer=self.default_regularizer()),
is_reverse=is_reverse,
h_0=h_0)
def fluid_sequence_pad(self, input, pad_value, maxlen=None):
def _fluid_sequence_pad(self, input, pad_value, maxlen=None):
"""
args:
input: (batch*seq_len, dim)
......@@ -121,7 +87,7 @@ class Model(ModelBase):
# TODO, maxlen=300, used to solve issues: https://github.com/PaddlePaddle/Paddle/issues/14164
return input_padded
def fluid_sequence_get_pos(self, lodtensor):
def _fluid_sequence_get_pos(self, lodtensor):
"""
args:
lodtensor: lod = [[0,4,7]]
......@@ -134,8 +100,8 @@ class Model(ModelBase):
assert lodtensor.shape == (-1, 1), (lodtensor.shape())
ones = fluid.layers.cast(lodtensor * 0 + 1,
'float32') # (batch*seq_len, 1)
ones_padded = self.fluid_sequence_pad(ones,
0) # (batch, max_seq_len, 1)
ones_padded = self._fluid_sequence_pad(ones,
0) # (batch, max_seq_len, 1)
ones_padded = fluid.layers.squeeze(ones_padded,
[2]) # (batch, max_seq_len)
seq_len = fluid.layers.cast(
......@@ -151,76 +117,106 @@ class Model(ModelBase):
return pos
def net(self, inputs, is_infer=False):
hidden_size = envs.get_global_env("hyper_parameters.hidden_size", None,
self._namespace)
user_vocab = envs.get_global_env("hyper_parameters.user_vocab", None,
self._namespace)
item_vocab = envs.get_global_env("hyper_parameters.item_vocab", None,
self._namespace)
embed_size = envs.get_global_env("hyper_parameters.embed_size", None,
self._namespace)
#encode
user_embedding = self.default_embedding(inputs[0], user_vocab,
embed_size)
user_feature = self.default_fc(
data=user_embedding,
size=hidden_size,
num_flatten_dims=1,
# user encode
user_embedding = fluid.embedding(
input=inputs[0],
size=[self.user_vocab, self.embed_size],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Xavier(),
regularizer=fluid.regularizer.L2Decay(1e-5)),
is_sparse=True)
user_feature = fluid.layers.fc(
input=user_embedding,
size=self.hidden_size,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.TruncatedNormal(
loc=0.0, scale=np.sqrt(1.0 / self.hidden_size))),
bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant(
value=0.0)),
act='relu',
name='user_feature_fc')
# item encode
item_embedding = fluid.embedding(
input=inputs[1],
size=[self.item_vocab, self.embed_size],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Xavier(),
regularizer=fluid.regularizer.L2Decay(1e-5)),
is_sparse=True)
item_embedding = self.default_embedding(inputs[1], item_vocab,
embed_size)
item_embedding = fluid.layers.sequence_unpad(
x=item_embedding, length=inputs[2])
item_fc = self.default_fc(
data=item_embedding,
size=hidden_size,
num_flatten_dims=1,
item_fc = fluid.layers.fc(
input=item_embedding,
size=self.hidden_size,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.TruncatedNormal(
loc=0.0, scale=np.sqrt(1.0 / self.hidden_size))),
bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant(
value=0.0)),
act='relu',
name='item_fc')
pos = self.fluid_sequence_get_pos(item_fc)
pos_embed = self.default_embedding(pos, user_vocab, embed_size)
pos = self._fluid_sequence_get_pos(item_fc)
pos_embed = fluid.embedding(
input=pos,
size=[self.user_vocab, self.embed_size],
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Xavier(),
regularizer=fluid.regularizer.L2Decay(1e-5)),
is_sparse=True)
pos_embed = fluid.layers.squeeze(pos_embed, [1])
# item gru
gru_input = self.default_fc(
data=fluid.layers.concat([item_fc, pos_embed], 1),
size=hidden_size * 3,
num_flatten_dims=1,
act='relu',
gru_input = fluid.layers.fc(
input=fluid.layers.concat([item_fc, pos_embed], 1),
size=self.hidden_size * 3,
name='item_gru_fc')
item_gru_forward = self.default_drnn(
data=gru_input, nf=hidden_size, h_0=user_feature, is_reverse=False)
# forward gru
item_gru_forward = fluid.layers.dynamic_gru(
input=gru_input,
size=self.hidden_size,
is_reverse=False,
h_0=user_feature)
# backward gru
item_gru_backward = fluid.layers.dynamic_gru(
input=gru_input,
size=self.hidden_size,
is_reverse=True,
h_0=user_feature)
item_gru_backward = self.default_drnn(
data=gru_input, nf=hidden_size, h_0=user_feature, is_reverse=True)
item_gru = fluid.layers.concat(
[item_gru_forward, item_gru_backward], axis=1)
out_click_fc1 = self.default_fc(
data=item_gru,
size=hidden_size,
num_flatten_dims=1,
out_click_fc1 = fluid.layers.fc(
input=item_gru,
size=self.hidden_size,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.TruncatedNormal(
loc=0.0, scale=np.sqrt(1.0 / self.hidden_size))),
bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant(
value=0.0)),
act='relu',
name='out_click_fc1')
click_prob = self.default_fc(
data=out_click_fc1,
size=2,
num_flatten_dims=1,
act='softmax',
name='out_click_fc2')
click_prob = fluid.layers.fc(input=out_click_fc1,
size=2,
act='softmax',
name='out_click_fc2')
labels = fluid.layers.sequence_unpad(x=inputs[3], length=inputs[2])
auc_val, batch_auc, auc_states = fluid.layers.auc(input=click_prob,
label=labels)
if is_infer:
self._infer_results["AUC"] = auc_val
return
loss = fluid.layers.reduce_mean(
fluid.layers.cross_entropy(
input=click_prob, label=labels))
......
......@@ -13,11 +13,12 @@
# limitations under the License.
from __future__ import print_function
import numpy as np
import paddle.fluid as fluid
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
from collections import defaultdict
import paddle.fluid as fluid
import numpy as np
class EvaluateReader(Reader):
......
......@@ -13,11 +13,12 @@
# limitations under the License.
from __future__ import print_function
import numpy as np
import paddle.fluid as fluid
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
from collections import defaultdict
import paddle.fluid as fluid
import numpy as np
class TrainReader(Reader):
......
# 融合模型库
# 重排序模型库
## 简介
我们提供了常见的多路排序融合使用的模型算法的PaddleRec实现, 单机训练&预测效果指标以及分布式训练&预测性能指标等。目前实现的模型是 [Listwise](listwise)
我们提供了常见的重排序使用的模型算法的PaddleRec实现, 单机训练&预测效果指标以及分布式训练&预测性能指标等。目前实现的模型是 [Listwise](listwise)
模型算法库在持续添加中,欢迎关注。
## 目录
* [整体介绍](#整体介绍)
* [融合模型列表](#融合模型列表)
* [重排序模型列表](#重排序模型列表)
* [使用教程](#使用教程)
* [训练 预测](#训练 预测)
* [效果对比](#效果对比)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册