未验证 提交 5a6abdf6 编写于 作者: W wuzhihua 提交者: GitHub

Merge pull request #15 from frankwhzhang/master

fix model for new version
doc/imgs/overview.png

698.6 KB | W: | H:

doc/imgs/overview.png

217.7 KB | W: | H:

doc/imgs/overview.png
doc/imgs/overview.png
doc/imgs/overview.png
doc/imgs/overview.png
  • 2-up
  • Swipe
  • Onion skin
...@@ -12,40 +12,55 @@ ...@@ -12,40 +12,55 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
evaluate:
reader:
batch_size: 1
class: "{workspace}/esmm_infer_reader.py"
test_data_path: "{workspace}/data/train"
train:
trainer:
# for cluster training
strategy: "async"
epochs: 3 workspace: "paddlerec.models.multitask.esmm"
workspace: "paddlerec.models.multitask.esmm"
device: cpu
reader: dataset:
batch_size: 2 - name: dataset_train
class: "{workspace}/esmm_reader.py" batch_size: 1
train_data_path: "{workspace}/data/train" type: QueueDataset
data_path: "{workspace}/data/train"
data_converter: "{workspace}/esmm_reader.py"
- name: dataset_infer
batch_size: 1
type: QueueDataset
data_path: "{workspace}/data/test"
data_converter: "{workspace}/esmm_reader.py"
model: hyper_parameters:
models: "{workspace}/model.py"
hyper_parameters:
vocab_size: 10000 vocab_size: 10000
embed_size: 128 embed_size: 128
optimizer:
class: adam
learning_rate: 0.001 learning_rate: 0.001
optimizer: adam strategy: async
#use infer_runner mode and modify 'phase' below if infer
mode: train_runner
#mode: infer_runner
runner:
- name: train_runner
class: single_train
device: cpu
epochs: 3
save_checkpoint_interval: 2
save_inference_interval: 4
save_checkpoint_path: "increment"
save_inference_path: "inference"
print_interval: 10
- name: infer_runner
class: single_infer
init_model_path: "increment/0"
device: cpu
epochs: 3
save: phase:
increment: - name: train
dirname: "increment" model: "{workspace}/model.py"
epoch_interval: 2 dataset_name: dataset_train
save_last: True thread_num: 1
inference: #- name: infer
dirname: "inference" # model: "{workspace}/model.py"
epoch_interval: 4 # dataset_name: dataset_infer
save_last: True # thread_num: 1
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from collections import defaultdict
from paddlerec.core.reader import Reader
class EvaluateReader(Reader):
def init(self):
all_field_id = [
'101', '109_14', '110_14', '127_14', '150_14', '121', '122', '124',
'125', '126', '127', '128', '129', '205', '206', '207', '210',
'216', '508', '509', '702', '853', '301'
]
self.all_field_id_dict = defaultdict(int)
for i, field_id in enumerate(all_field_id):
self.all_field_id_dict[field_id] = [False, i]
def generate_sample(self, line):
"""
Read the data line by line and process it as a dictionary
"""
def reader():
"""
This function needs to be implemented by the user, based on data format
"""
features = line.strip().split(',')
ctr = int(features[1])
cvr = int(features[2])
padding = 0
output = [(field_id, []) for field_id in self.all_field_id_dict]
for elem in features[4:]:
field_id, feat_id = elem.strip().split(':')
if field_id not in self.all_field_id_dict:
continue
self.all_field_id_dict[field_id][0] = True
index = self.all_field_id_dict[field_id][1]
output[index][1].append(int(feat_id))
for field_id in self.all_field_id_dict:
visited, index = self.all_field_id_dict[field_id]
if visited:
self.all_field_id_dict[field_id][0] = False
else:
output[index][1].append(padding)
output.append(('ctr', [ctr]))
output.append(('cvr', [cvr]))
yield output
return reader
...@@ -40,8 +40,6 @@ class TrainReader(Reader): ...@@ -40,8 +40,6 @@ class TrainReader(Reader):
This function needs to be implemented by the user, based on data format This function needs to be implemented by the user, based on data format
""" """
features = line.strip().split(',') features = line.strip().split(',')
# ctr = list(map(int, features[1]))
# cvr = list(map(int, features[2]))
ctr = int(features[1]) ctr = int(features[1])
cvr = int(features[2]) cvr = int(features[2])
...@@ -54,7 +52,6 @@ class TrainReader(Reader): ...@@ -54,7 +52,6 @@ class TrainReader(Reader):
continue continue
self.all_field_id_dict[field_id][0] = True self.all_field_id_dict[field_id][0] = True
index = self.all_field_id_dict[field_id][1] index = self.all_field_id_dict[field_id][1]
# feat_id = list(map(int, feat_id))
output[index][1].append(int(feat_id)) output[index][1].append(int(feat_id))
for field_id in self.all_field_id_dict: for field_id in self.all_field_id_dict:
......
...@@ -23,28 +23,11 @@ class Model(ModelBase): ...@@ -23,28 +23,11 @@ class Model(ModelBase):
def __init__(self, config): def __init__(self, config):
ModelBase.__init__(self, config) ModelBase.__init__(self, config)
def fc(self, tag, data, out_dim, active='prelu'): def _init_hyper_parameters(self):
self.vocab_size = envs.get_global_env("hyper_parameters.vocab_size")
self.embed_size = envs.get_global_env("hyper_parameters.embed_size")
init_stddev = 1.0 def input_data(self, is_infer=False, **kwargs):
scales = 1.0 / np.sqrt(data.shape[1])
p_attr = fluid.param_attr.ParamAttr(
name='%s_weight' % tag,
initializer=fluid.initializer.NormalInitializer(
loc=0.0, scale=init_stddev * scales))
b_attr = fluid.ParamAttr(
name='%s_bias' % tag, initializer=fluid.initializer.Constant(0.1))
out = fluid.layers.fc(input=data,
size=out_dim,
act=active,
param_attr=p_attr,
bias_attr=b_attr,
name=tag)
return out
def input_data(self):
sparse_input_ids = [ sparse_input_ids = [
fluid.data( fluid.data(
name="field_" + str(i), name="field_" + str(i),
...@@ -55,26 +38,24 @@ class Model(ModelBase): ...@@ -55,26 +38,24 @@ class Model(ModelBase):
label_ctr = fluid.data(name="ctr", shape=[-1, 1], dtype="int64") label_ctr = fluid.data(name="ctr", shape=[-1, 1], dtype="int64")
label_cvr = fluid.data(name="cvr", shape=[-1, 1], dtype="int64") label_cvr = fluid.data(name="cvr", shape=[-1, 1], dtype="int64")
inputs = sparse_input_ids + [label_ctr] + [label_cvr] inputs = sparse_input_ids + [label_ctr] + [label_cvr]
self._data_var.extend(inputs) if is_infer:
return inputs
else:
return inputs return inputs
def net(self, inputs, is_infer=False): def net(self, inputs, is_infer=False):
vocab_size = envs.get_global_env("hyper_parameters.vocab_size", None,
self._namespace)
embed_size = envs.get_global_env("hyper_parameters.embed_size", None,
self._namespace)
emb = [] emb = []
# input feature data
for data in inputs[0:-2]: for data in inputs[0:-2]:
feat_emb = fluid.embedding( feat_emb = fluid.embedding(
input=data, input=data,
size=[vocab_size, embed_size], size=[self.vocab_size, self.embed_size],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name='dis_emb', name='dis_emb',
learning_rate=5, learning_rate=5,
initializer=fluid.initializer.Xavier( initializer=fluid.initializer.Xavier(
fan_in=embed_size, fan_out=embed_size)), fan_in=self.embed_size, fan_out=self.embed_size)),
is_sparse=True) is_sparse=True)
field_emb = fluid.layers.sequence_pool( field_emb = fluid.layers.sequence_pool(
input=feat_emb, pool_type='sum') input=feat_emb, pool_type='sum')
...@@ -83,14 +64,14 @@ class Model(ModelBase): ...@@ -83,14 +64,14 @@ class Model(ModelBase):
# ctr # ctr
active = 'relu' active = 'relu'
ctr_fc1 = self.fc('ctr_fc1', concat_emb, 200, active) ctr_fc1 = self._fc('ctr_fc1', concat_emb, 200, active)
ctr_fc2 = self.fc('ctr_fc2', ctr_fc1, 80, active) ctr_fc2 = self._fc('ctr_fc2', ctr_fc1, 80, active)
ctr_out = self.fc('ctr_out', ctr_fc2, 2, 'softmax') ctr_out = self._fc('ctr_out', ctr_fc2, 2, 'softmax')
# cvr # cvr
cvr_fc1 = self.fc('cvr_fc1', concat_emb, 200, active) cvr_fc1 = self._fc('cvr_fc1', concat_emb, 200, active)
cvr_fc2 = self.fc('cvr_fc2', cvr_fc1, 80, active) cvr_fc2 = self._fc('cvr_fc2', cvr_fc1, 80, active)
cvr_out = self.fc('cvr_out', cvr_fc2, 2, 'softmax') cvr_out = self._fc('cvr_out', cvr_fc2, 2, 'softmax')
ctr_clk = inputs[-2] ctr_clk = inputs[-2]
ctcvr_buy = inputs[-1] ctcvr_buy = inputs[-1]
...@@ -127,15 +108,23 @@ class Model(ModelBase): ...@@ -127,15 +108,23 @@ class Model(ModelBase):
self._metrics["AUC_ctcvr"] = auc_ctcvr self._metrics["AUC_ctcvr"] = auc_ctcvr
self._metrics["BATCH_AUC_ctcvr"] = batch_auc_ctcvr self._metrics["BATCH_AUC_ctcvr"] = batch_auc_ctcvr
def train_net(self): def _fc(self, tag, data, out_dim, active='prelu'):
input_data = self.input_data()
self.net(input_data) init_stddev = 1.0
scales = 1.0 / np.sqrt(data.shape[1])
def infer_net(self):
self._infer_data_var = self.input_data() p_attr = fluid.param_attr.ParamAttr(
self._infer_data_loader = fluid.io.DataLoader.from_generator( name='%s_weight' % tag,
feed_list=self._infer_data_var, initializer=fluid.initializer.NormalInitializer(
capacity=64, loc=0.0, scale=init_stddev * scales))
use_double_buffer=False,
iterable=False) b_attr = fluid.ParamAttr(
self.net(self._infer_data_var, is_infer=True) name='%s_bias' % tag, initializer=fluid.initializer.Constant(0.1))
out = fluid.layers.fc(input=data,
size=out_dim,
act=active,
param_attr=p_attr,
bias_attr=b_attr,
name=tag)
return out
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from paddlerec.core.reader import Reader
class EvaluateReader(Reader):
def init(self):
pass
def generate_sample(self, line):
"""
Read the data line by line and process it as a dictionary
"""
def reader():
"""
This function needs to be implemented by the user, based on data format
"""
l = line.strip().split(',')
l = list(map(float, l))
label_income = []
label_marital = []
data = l[2:]
if int(l[1]) == 0:
label_income = [1, 0]
elif int(l[1]) == 1:
label_income = [0, 1]
if int(l[0]) == 0:
label_marital = [1, 0]
elif int(l[0]) == 1:
label_marital = [0, 1]
feature_name = ["input", "label_income", "label_marital"]
yield zip(feature_name, [data] + [label_income] + [label_marital])
return reader
...@@ -12,43 +12,57 @@ ...@@ -12,43 +12,57 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
evaluate: workspace: "paddlerec.models.multitask.mmoe"
reader:
batch_size: 1
class: "{workspace}/census_infer_reader.py"
test_data_path: "{workspace}/data/train"
train:
trainer:
# for cluster training
strategy: "async"
epochs: 3
workspace: "paddlerec.models.multitask.mmoe"
device: cpu
reader: dataset:
- name: dataset_train
batch_size: 1
type: QueueDataset
data_path: "{workspace}/data/train"
data_converter: "{workspace}/census_reader.py"
- name: dataset_infer
batch_size: 1 batch_size: 1
class: "{workspace}/census_reader.py" type: QueueDataset
train_data_path: "{workspace}/data/train" data_path: "{workspace}/data/train"
data_converter: "{workspace}/census_reader.py"
model: hyper_parameters:
models: "{workspace}/model.py"
hyper_parameters:
feature_size: 499 feature_size: 499
expert_num: 8 expert_num: 8
gate_num: 2 gate_num: 2
expert_size: 16 expert_size: 16
tower_size: 8 tower_size: 8
optimizer:
class: adam
learning_rate: 0.001 learning_rate: 0.001
optimizer: adam strategy: async
#use infer_runner mode and modify 'phase' below if infer
mode: train_runner
#mode: infer_runner
runner:
- name: train_runner
class: single_train
device: cpu
epochs: 3
save_checkpoint_interval: 2
save_inference_interval: 4
save_checkpoint_path: "increment"
save_inference_path: "inference"
print_interval: 10
- name: infer_runner
class: single_infer
init_model_path: "increment/0"
device: cpu
epochs: 3
save: phase:
increment: - name: train
dirname: "increment" model: "{workspace}/model.py"
epoch_interval: 2 dataset_name: dataset_train
save_last: True thread_num: 1
inference: #- name: infer
dirname: "inference" # model: "{workspace}/model.py"
epoch_interval: 4 # dataset_name: dataset_infer
save_last: True # thread_num: 1
mkdir train_data
mkdir test_data
mkdir data
train_path="data/census-income.data"
test_path="data/census-income.test"
train_data_path="train_data/"
test_data_path="test_data/"
pip install -r requirements.txt
wget -P data/ https://archive.ics.uci.edu/ml/machine-learning-databases/census-income-mld/census.tar.gz
tar -zxvf data/census.tar.gz -C data/
python data_preparation.py --train_path ${train_path} \
--test_path ${test_path} \
--train_data_path ${train_data_path}\
--test_data_path ${test_data_path}
...@@ -22,53 +22,51 @@ class Model(ModelBase): ...@@ -22,53 +22,51 @@ class Model(ModelBase):
def __init__(self, config): def __init__(self, config):
ModelBase.__init__(self, config) ModelBase.__init__(self, config)
def MMOE(self, is_infer=False): def _init_hyper_parameters(self):
feature_size = envs.get_global_env("hyper_parameters.feature_size", self.feature_size = envs.get_global_env(
None, self._namespace) "hyper_parameters.feature_size")
expert_num = envs.get_global_env("hyper_parameters.expert_num", None, self.expert_num = envs.get_global_env("hyper_parameters.expert_num")
self._namespace) self.gate_num = envs.get_global_env("hyper_parameters.gate_num")
gate_num = envs.get_global_env("hyper_parameters.gate_num", None, self.expert_size = envs.get_global_env("hyper_parameters.expert_size")
self._namespace) self.tower_size = envs.get_global_env("hyper_parameters.tower_size")
expert_size = envs.get_global_env("hyper_parameters.expert_size", None,
self._namespace) def input_data(self, is_infer=False, **kwargs):
tower_size = envs.get_global_env("hyper_parameters.tower_size", None, inputs = fluid.data(
self._namespace) name="input", shape=[-1, self.feature_size], dtype="float32")
input_data = fluid.data(
name="input", shape=[-1, feature_size], dtype="float32")
label_income = fluid.data( label_income = fluid.data(
name="label_income", shape=[-1, 2], dtype="float32", lod_level=0) name="label_income", shape=[-1, 2], dtype="float32", lod_level=0)
label_marital = fluid.data( label_marital = fluid.data(
name="label_marital", shape=[-1, 2], dtype="float32", lod_level=0) name="label_marital", shape=[-1, 2], dtype="float32", lod_level=0)
if is_infer: if is_infer:
self._infer_data_var = [input_data, label_income, label_marital] return [inputs, label_income, label_marital]
self._infer_data_loader = fluid.io.DataLoader.from_generator( else:
feed_list=self._infer_data_var, return [inputs, label_income, label_marital]
capacity=64,
use_double_buffer=False, def net(self, inputs, is_infer=False):
iterable=False) input_data = inputs[0]
label_income = inputs[1]
self._data_var.extend([input_data, label_income, label_marital]) label_marital = inputs[2]
# f_{i}(x) = activation(W_{i} * x + b), where activation is ReLU according to the paper # f_{i}(x) = activation(W_{i} * x + b), where activation is ReLU according to the paper
expert_outputs = [] expert_outputs = []
for i in range(0, expert_num): for i in range(0, self.expert_num):
expert_output = fluid.layers.fc( expert_output = fluid.layers.fc(
input=input_data, input=input_data,
size=expert_size, size=self.expert_size,
act='relu', act='relu',
bias_attr=fluid.ParamAttr(learning_rate=1.0), bias_attr=fluid.ParamAttr(learning_rate=1.0),
name='expert_' + str(i)) name='expert_' + str(i))
expert_outputs.append(expert_output) expert_outputs.append(expert_output)
expert_concat = fluid.layers.concat(expert_outputs, axis=1) expert_concat = fluid.layers.concat(expert_outputs, axis=1)
expert_concat = fluid.layers.reshape(expert_concat, expert_concat = fluid.layers.reshape(
[-1, expert_num, expert_size]) expert_concat, [-1, self.expert_num, self.expert_size])
# g^{k}(x) = activation(W_{gk} * x + b), where activation is softmax according to the paper # g^{k}(x) = activation(W_{gk} * x + b), where activation is softmax according to the paper
output_layers = [] output_layers = []
for i in range(0, gate_num): for i in range(0, self.gate_num):
cur_gate = fluid.layers.fc( cur_gate = fluid.layers.fc(
input=input_data, input=input_data,
size=expert_num, size=self.expert_num,
act='softmax', act='softmax',
bias_attr=fluid.ParamAttr(learning_rate=1.0), bias_attr=fluid.ParamAttr(learning_rate=1.0),
name='gate_' + str(i)) name='gate_' + str(i))
...@@ -78,7 +76,7 @@ class Model(ModelBase): ...@@ -78,7 +76,7 @@ class Model(ModelBase):
cur_gate_expert = fluid.layers.reduce_sum(cur_gate_expert, dim=1) cur_gate_expert = fluid.layers.reduce_sum(cur_gate_expert, dim=1)
# Build tower layer # Build tower layer
cur_tower = fluid.layers.fc(input=cur_gate_expert, cur_tower = fluid.layers.fc(input=cur_gate_expert,
size=tower_size, size=self.tower_size,
act='relu', act='relu',
name='task_layer_' + str(i)) name='task_layer_' + str(i))
out = fluid.layers.fc(input=cur_tower, out = fluid.layers.fc(input=cur_tower,
...@@ -127,8 +125,5 @@ class Model(ModelBase): ...@@ -127,8 +125,5 @@ class Model(ModelBase):
self._metrics["AUC_marital"] = auc_marital self._metrics["AUC_marital"] = auc_marital
self._metrics["BATCH_AUC_marital"] = batch_auc_2 self._metrics["BATCH_AUC_marital"] = batch_auc_2
def train_net(self):
self.MMOE()
def infer_net(self): def infer_net(self):
self.MMOE(is_infer=True) pass
...@@ -9,7 +9,9 @@ ...@@ -9,7 +9,9 @@
* [整体介绍](#整体介绍) * [整体介绍](#整体介绍)
* [多任务模型列表](#多任务模型列表) * [多任务模型列表](#多任务模型列表)
* [使用教程](#使用教程) * [使用教程](#使用教程)
* [训练&预测](#训练&预测) * [数据处理](#数据处理)
* [训练](#训练)
* [预测](#预测)
* [效果对比](#效果对比) * [效果对比](#效果对比)
* [模型效果列表](#模型效果列表) * [模型效果列表](#模型效果列表)
...@@ -40,14 +42,49 @@ ...@@ -40,14 +42,49 @@
<img align="center" src="../../doc/imgs/mmoe.png"> <img align="center" src="../../doc/imgs/mmoe.png">
<p> <p>
## 使用教程 ## 使用教程(快速开始)
### 训练&预测
```shell ```shell
python -m paddlerec.run -m paddlerec.models.multitask.mmoe # mmoe python -m paddlerec.run -m paddlerec.models.multitask.mmoe # mmoe
python -m paddlerec.run -m paddlerec.models.multitask.share-bottom # share-bottom python -m paddlerec.run -m paddlerec.models.multitask.share-bottom # share-bottom
python -m paddlerec.run -m paddlerec.models.multitask.esmm # esmm python -m paddlerec.run -m paddlerec.models.multitask.esmm # esmm
``` ```
## 使用教程(复现论文)
### 注意
为了方便使用者能够快速的跑通每一个模型,我们在每个模型下都提供了样例数据,并且调整了batch_size等超参以便在样例数据上更加友好的显示训练&测试日志。如果需要复现readme中的效果请按照如下表格调整batch_size等超参,并使用提供的脚本下载对应数据集以及数据预处理。
| 模型 | batch_size | thread_num | epoch_num |
| :------------------: | :--------------------: | :--------------------: | :--------------------: |
| Share-Bottom | 32 | 1 | 400 |
| MMoE | 32 | 1 | 400 |
| ESMM | 64 | 2 | 100 |
### 数据处理
参考每个模型目录数据下载&预处理脚本
```
sh run.sh
```
### 训练
```
cd modles/multitask/mmoe # 进入选定好的排序模型的目录 以MMoE为例
python -m paddlerec.run -m ./config.yaml # 自定义修改超参后,指定配置文件,使用自定义配置
```
### 预测
```
# 修改对应模型的config.yaml, workspace配置为当前目录的绝对路径
# 修改对应模型的config.yaml,mode配置infer_runner
# 示例: mode: train_runner -> mode: infer_runner
# infer_runner中 class配置为 class: single_infer
# 修改phase阶段为infer的配置,参照config注释
# 修改完config.yaml后 执行:
python -m paddlerec.run -m ./config.yaml # 以MMoE为例
```
## 效果对比 ## 效果对比
### 模型效果列表 ### 模型效果列表
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from paddlerec.core.reader import Reader
class EvaluateReader(Reader):
def init(self):
pass
def generate_sample(self, line):
"""
Read the data line by line and process it as a dictionary
"""
def reader():
"""
This function needs to be implemented by the user, based on data format
"""
l = line.strip().split(',')
l = list(map(float, l))
label_income = []
label_marital = []
data = l[2:]
if int(l[1]) == 0:
label_income = [1, 0]
elif int(l[1]) == 1:
label_income = [0, 1]
if int(l[0]) == 0:
label_marital = [1, 0]
elif int(l[0]) == 1:
label_marital = [0, 1]
feature_name = ["input", "label_income", "label_marital"]
yield zip(feature_name, [data] + [label_income] + [label_marital])
return reader
...@@ -12,42 +12,56 @@ ...@@ -12,42 +12,56 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
evaluate: workspace: "paddlerec.models.multitask.share-bottom"
reader:
batch_size: 1
class: "{workspace}/census_infer_reader.py"
test_data_path: "{workspace}/data/train"
train:
trainer:
# for cluster training
strategy: "async"
epochs: 3 dataset:
workspace: "paddlerec.models.multitask.share-bottom" - name: dataset_train
device: cpu batch_size: 1
type: QueueDataset
reader: data_path: "{workspace}/data/train"
batch_size: 2 data_converter: "{workspace}/census_reader.py"
class: "{workspace}/census_reader.py" - name: dataset_infer
train_data_path: "{workspace}/data/train" batch_size: 1
type: QueueDataset
data_path: "{workspace}/data/train"
data_converter: "{workspace}/census_reader.py"
model: hyper_parameters:
models: "{workspace}/model.py"
hyper_parameters:
feature_size: 499 feature_size: 499
bottom_size: 117 bottom_size: 117
tower_nums: 2 tower_nums: 2
tower_size: 8 tower_size: 8
optimizer:
class: adam
learning_rate: 0.001 learning_rate: 0.001
optimizer: adam strategy: async
#use infer_runner mode and modify 'phase' below if infer
mode: train_runner
#mode: infer_runner
runner:
- name: train_runner
class: single_train
device: cpu
epochs: 3
save_checkpoint_interval: 2
save_inference_interval: 4
save_checkpoint_path: "increment"
save_inference_path: "inference"
print_interval: 5
- name: infer_runner
class: single_infer
init_model_path: "increment/0"
device: cpu
epochs: 3
save: phase:
increment: - name: train
dirname: "increment" model: "{workspace}/model.py"
epoch_interval: 2 dataset_name: dataset_train
save_last: True thread_num: 1
inference: #- name: infer
dirname: "inference" # model: "{workspace}/model.py"
epoch_interval: 4 # dataset_name: dataset_infer
save_last: True # thread_num: 1
...@@ -22,46 +22,42 @@ class Model(ModelBase): ...@@ -22,46 +22,42 @@ class Model(ModelBase):
def __init__(self, config): def __init__(self, config):
ModelBase.__init__(self, config) ModelBase.__init__(self, config)
def model(self, is_infer=False): def _init_hyper_parameters(self):
self.feature_size = envs.get_global_env(
feature_size = envs.get_global_env("hyper_parameters.feature_size", "hyper_parameters.feature_size")
None, self._namespace) self.bottom_size = envs.get_global_env("hyper_parameters.bottom_size")
bottom_size = envs.get_global_env("hyper_parameters.bottom_size", None, self.tower_size = envs.get_global_env("hyper_parameters.tower_size")
self._namespace) self.tower_nums = envs.get_global_env("hyper_parameters.tower_nums")
tower_size = envs.get_global_env("hyper_parameters.tower_size", None,
self._namespace) def input_data(self, is_infer=False, **kwargs):
tower_nums = envs.get_global_env("hyper_parameters.tower_nums", None, inputs = fluid.data(
self._namespace) name="input", shape=[-1, self.feature_size], dtype="float32")
input_data = fluid.data(
name="input", shape=[-1, feature_size], dtype="float32")
label_income = fluid.data( label_income = fluid.data(
name="label_income", shape=[-1, 2], dtype="float32", lod_level=0) name="label_income", shape=[-1, 2], dtype="float32", lod_level=0)
label_marital = fluid.data( label_marital = fluid.data(
name="label_marital", shape=[-1, 2], dtype="float32", lod_level=0) name="label_marital", shape=[-1, 2], dtype="float32", lod_level=0)
if is_infer: if is_infer:
self._infer_data_var = [input_data, label_income, label_marital] return [inputs, label_income, label_marital]
self._infer_data_loader = fluid.io.DataLoader.from_generator( else:
feed_list=self._infer_data_var, return [inputs, label_income, label_marital]
capacity=64,
use_double_buffer=False,
iterable=False)
self._data_var.extend([input_data, label_income, label_marital]) def net(self, inputs, is_infer=False):
input_data = inputs[0]
label_income = inputs[1]
label_marital = inputs[2]
bottom_output = fluid.layers.fc( bottom_output = fluid.layers.fc(
input=input_data, input=input_data,
size=bottom_size, size=self.bottom_size,
act='relu', act='relu',
bias_attr=fluid.ParamAttr(learning_rate=1.0), bias_attr=fluid.ParamAttr(learning_rate=1.0),
name='bottom_output') name='bottom_output')
# Build tower layer from bottom layer # Build tower layer from bottom layer
output_layers = [] output_layers = []
for index in range(tower_nums): for index in range(self.tower_nums):
tower_layer = fluid.layers.fc(input=bottom_output, tower_layer = fluid.layers.fc(input=bottom_output,
size=tower_size, size=self.tower_size,
act='relu', act='relu',
name='task_layer_' + str(index)) name='task_layer_' + str(index))
output_layer = fluid.layers.fc(input=tower_layer, output_layer = fluid.layers.fc(input=tower_layer,
...@@ -107,9 +103,3 @@ class Model(ModelBase): ...@@ -107,9 +103,3 @@ class Model(ModelBase):
self._metrics["BATCH_AUC_income"] = batch_auc_1 self._metrics["BATCH_AUC_income"] = batch_auc_1
self._metrics["AUC_marital"] = auc_marital self._metrics["AUC_marital"] = auc_marital
self._metrics["BATCH_AUC_marital"] = batch_auc_2 self._metrics["BATCH_AUC_marital"] = batch_auc_2
def train_net(self):
self.model()
def infer_net(self):
self.model(is_infer=True)
...@@ -12,31 +12,21 @@ ...@@ -12,31 +12,21 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
evaluate: workspace: "paddlerec.models.recall.gru4rec"
reader:
batch_size: 1
class: "{workspace}/rsc15_infer_reader.py"
test_data_path: "{workspace}/data/train"
is_return_numpy: False
dataset:
train: - name: dataset_train
trainer: batch_size: 5
# for cluster training type: QueueDataset
strategy: "async" data_path: "{workspace}/data/train"
data_converter: "{workspace}/rsc15_reader.py"
epochs: 3 - name: dataset_infer
workspace: "paddlerec.models.recall.gru4rec"
device: cpu
reader:
batch_size: 5 batch_size: 5
class: "{workspace}/rsc15_reader.py" type: QueueDataset
train_data_path: "{workspace}/data/train" data_path: "{workspace}/data/test"
data_converter: "{workspace}/rsc15_reader.py"
model: hyper_parameters:
models: "{workspace}/model.py"
hyper_parameters:
vocab_size: 1000 vocab_size: 1000
hid_size: 100 hid_size: 100
emb_lr_x: 10.0 emb_lr_x: 10.0
...@@ -44,15 +34,37 @@ train: ...@@ -44,15 +34,37 @@ train:
fc_lr_x: 1.0 fc_lr_x: 1.0
init_low_bound: -0.04 init_low_bound: -0.04
init_high_bound: 0.04 init_high_bound: 0.04
optimizer:
class: adagrad
learning_rate: 0.01 learning_rate: 0.01
optimizer: adagrad strategy: async
#use infer_runner mode and modify 'phase' below if infer
mode: train_runner
#mode: infer_runner
runner:
- name: train_runner
class: single_train
device: cpu
epochs: 3
save_checkpoint_interval: 2
save_inference_interval: 4
save_checkpoint_path: "increment"
save_inference_path: "inference"
print_interval: 10
- name: infer_runner
class: single_infer
init_model_path: "increment/0"
device: cpu
epochs: 3
save: phase:
increment: - name: train
dirname: "increment" model: "{workspace}/model.py"
epoch_interval: 2 dataset_name: dataset_train
save_last: True thread_num: 1
inference: #- name: infer
dirname: "inference" # model: "{workspace}/model.py"
epoch_interval: 4 # dataset_name: dataset_infer
save_last: True # thread_num: 1
...@@ -22,84 +22,72 @@ class Model(ModelBase): ...@@ -22,84 +22,72 @@ class Model(ModelBase):
def __init__(self, config): def __init__(self, config):
ModelBase.__init__(self, config) ModelBase.__init__(self, config)
def all_vocab_network(self, is_infer=False): def _init_hyper_parameters(self):
""" network definition """ self.recall_k = envs.get_global_env("hyper_parameters.recall_k")
recall_k = envs.get_global_env("hyper_parameters.recall_k", None, self.vocab_size = envs.get_global_env("hyper_parameters.vocab_size")
self._namespace) self.hid_size = envs.get_global_env("hyper_parameters.hid_size")
vocab_size = envs.get_global_env("hyper_parameters.vocab_size", None, self.init_low_bound = envs.get_global_env(
self._namespace) "hyper_parameters.init_low_bound")
hid_size = envs.get_global_env("hyper_parameters.hid_size", None, self.init_high_bound = envs.get_global_env(
self._namespace) "hyper_parameters.init_high_bound")
init_low_bound = envs.get_global_env("hyper_parameters.init_low_bound", self.emb_lr_x = envs.get_global_env("hyper_parameters.emb_lr_x")
None, self._namespace) self.gru_lr_x = envs.get_global_env("hyper_parameters.gru_lr_x")
init_high_bound = envs.get_global_env( self.fc_lr_x = envs.get_global_env("hyper_parameters.fc_lr_x")
"hyper_parameters.init_high_bound", None, self._namespace)
emb_lr_x = envs.get_global_env("hyper_parameters.emb_lr_x", None, def input_data(self, is_infer=False, **kwargs):
self._namespace)
gru_lr_x = envs.get_global_env("hyper_parameters.gru_lr_x", None,
self._namespace)
fc_lr_x = envs.get_global_env("hyper_parameters.fc_lr_x", None,
self._namespace)
# Input data # Input data
src_wordseq = fluid.data( src_wordseq = fluid.data(
name="src_wordseq", shape=[None, 1], dtype="int64", lod_level=1) name="src_wordseq", shape=[None, 1], dtype="int64", lod_level=1)
dst_wordseq = fluid.data( dst_wordseq = fluid.data(
name="dst_wordseq", shape=[None, 1], dtype="int64", lod_level=1) name="dst_wordseq", shape=[None, 1], dtype="int64", lod_level=1)
if is_infer: return [src_wordseq, dst_wordseq]
self._infer_data_var = [src_wordseq, dst_wordseq]
self._infer_data_loader = fluid.io.DataLoader.from_generator( def net(self, inputs, is_infer=False):
feed_list=self._infer_data_var, src_wordseq = inputs[0]
capacity=64, dst_wordseq = inputs[1]
use_double_buffer=False,
iterable=False)
emb = fluid.embedding( emb = fluid.embedding(
input=src_wordseq, input=src_wordseq,
size=[vocab_size, hid_size], size=[self.vocab_size, self.hid_size],
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name="emb", name="emb",
initializer=fluid.initializer.Uniform( initializer=fluid.initializer.Uniform(
low=init_low_bound, high=init_high_bound), low=self.init_low_bound, high=self.init_high_bound),
learning_rate=emb_lr_x), learning_rate=self.emb_lr_x),
is_sparse=True) is_sparse=True)
fc0 = fluid.layers.fc(input=emb, fc0 = fluid.layers.fc(input=emb,
size=hid_size * 3, size=self.hid_size * 3,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform( initializer=fluid.initializer.Uniform(
low=init_low_bound, low=self.init_low_bound,
high=init_high_bound), high=self.init_high_bound),
learning_rate=gru_lr_x)) learning_rate=self.gru_lr_x))
gru_h0 = fluid.layers.dynamic_gru( gru_h0 = fluid.layers.dynamic_gru(
input=fc0, input=fc0,
size=hid_size, size=self.hid_size,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform( initializer=fluid.initializer.Uniform(
low=init_low_bound, high=init_high_bound), low=self.init_low_bound, high=self.init_high_bound),
learning_rate=gru_lr_x)) learning_rate=self.gru_lr_x))
fc = fluid.layers.fc(input=gru_h0, fc = fluid.layers.fc(input=gru_h0,
size=vocab_size, size=self.vocab_size,
act='softmax', act='softmax',
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform( initializer=fluid.initializer.Uniform(
low=init_low_bound, high=init_high_bound), low=self.init_low_bound,
learning_rate=fc_lr_x)) high=self.init_high_bound),
learning_rate=self.fc_lr_x))
cost = fluid.layers.cross_entropy(input=fc, label=dst_wordseq) cost = fluid.layers.cross_entropy(input=fc, label=dst_wordseq)
acc = fluid.layers.accuracy(input=fc, label=dst_wordseq, k=recall_k) acc = fluid.layers.accuracy(
input=fc, label=dst_wordseq, k=self.recall_k)
if is_infer: if is_infer:
self._infer_results['recall20'] = acc self._infer_results['recall20'] = acc
return return
avg_cost = fluid.layers.mean(x=cost) avg_cost = fluid.layers.mean(x=cost)
self._data_var.append(src_wordseq)
self._data_var.append(dst_wordseq)
self._cost = avg_cost self._cost = avg_cost
self._metrics["cost"] = avg_cost self._metrics["cost"] = avg_cost
self._metrics["acc"] = acc self._metrics["acc"] = acc
def train_net(self):
self.all_vocab_network()
def infer_net(self):
self.all_vocab_network(is_infer=True)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from paddlerec.core.reader import Reader
class EvaluateReader(Reader):
def init(self):
pass
def generate_sample(self, line):
"""
Read the data line by line and process it as a dictionary
"""
def reader():
"""
This function needs to be implemented by the user, based on data format
"""
l = line.strip().split()
l = [w for w in l]
src_seq = l[:len(l) - 1]
src_seq = [int(e) for e in src_seq]
trg_seq = l[1:]
trg_seq = [int(e) for e in trg_seq]
feature_name = ["src_wordseq", "dst_wordseq"]
yield zip(feature_name, [src_seq] + [trg_seq])
return reader
...@@ -12,42 +12,56 @@ ...@@ -12,42 +12,56 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
evaluate: workspace: "paddlerec.models.recall.ncf"
reader:
batch_size: 1
class: "{workspace}/movielens_infer_reader.py"
test_data_path: "{workspace}/data/test"
train: dataset:
trainer: - name: dataset_train
# for cluster training batch_size: 5
strategy: "async" type: QueueDataset
data_path: "{workspace}/data/train"
data_converter: "{workspace}/movielens_reader.py"
- name: dataset_infer
batch_size: 5
type: QueueDataset
data_path: "{workspace}/data/test"
data_converter: "{workspace}/movielens_infer_reader.py"
epochs: 3 hyper_parameters:
workspace: "paddlerec.models.recall.ncf"
device: cpu
reader:
batch_size: 2
class: "{workspace}/movielens_reader.py"
train_data_path: "{workspace}/data/train"
model:
models: "{workspace}/model.py"
hyper_parameters:
num_users: 6040 num_users: 6040
num_items: 3706 num_items: 3706
latent_dim: 8 latent_dim: 8
layers: [64, 32, 16, 8] fc_layers: [64, 32, 16, 8]
optimizer:
class: adam
learning_rate: 0.001 learning_rate: 0.001
optimizer: adam strategy: async
#use infer_runner mode and modify 'phase' below if infer
mode: train_runner
#mode: infer_runner
runner:
- name: train_runner
class: single_train
device: cpu
epochs: 3
save_checkpoint_interval: 2
save_inference_interval: 4
save_checkpoint_path: "increment"
save_inference_path: "inference"
print_interval: 10
- name: infer_runner
class: single_infer
init_model_path: "increment/0"
device: cpu
epochs: 3
save: phase:
increment: - name: train
dirname: "increment" model: "{workspace}/model.py"
epoch_interval: 2 dataset_name: dataset_train
save_last: True thread_num: 1
inference: #- name: infer
dirname: "inference" # model: "{workspace}/model.py"
epoch_interval: 4 # dataset_name: dataset_infer
save_last: True # thread_num: 1
...@@ -24,7 +24,13 @@ class Model(ModelBase): ...@@ -24,7 +24,13 @@ class Model(ModelBase):
def __init__(self, config): def __init__(self, config):
ModelBase.__init__(self, config) ModelBase.__init__(self, config)
def input_data(self, is_infer=False): def _init_hyper_parameters(self):
self.num_users = envs.get_global_env("hyper_parameters.num_users")
self.num_items = envs.get_global_env("hyper_parameters.num_items")
self.latent_dim = envs.get_global_env("hyper_parameters.latent_dim")
self.layers = envs.get_global_env("hyper_parameters.fc_layers")
def input_data(self, is_infer=False, **kwargs):
user_input = fluid.data( user_input = fluid.data(
name="user_input", shape=[-1, 1], dtype="int64", lod_level=0) name="user_input", shape=[-1, 1], dtype="int64", lod_level=0)
item_input = fluid.data( item_input = fluid.data(
...@@ -35,45 +41,35 @@ class Model(ModelBase): ...@@ -35,45 +41,35 @@ class Model(ModelBase):
inputs = [user_input] + [item_input] inputs = [user_input] + [item_input]
else: else:
inputs = [user_input] + [item_input] + [label] inputs = [user_input] + [item_input] + [label]
self._data_var = inputs
return inputs return inputs
def net(self, inputs, is_infer=False): def net(self, inputs, is_infer=False):
num_users = envs.get_global_env("hyper_parameters.num_users", None, num_layer = len(self.layers) #Number of layers in the MLP
self._namespace)
num_items = envs.get_global_env("hyper_parameters.num_items", None,
self._namespace)
latent_dim = envs.get_global_env("hyper_parameters.latent_dim", None,
self._namespace)
layers = envs.get_global_env("hyper_parameters.layers", None,
self._namespace)
num_layer = len(layers) #Number of layers in the MLP
MF_Embedding_User = fluid.embedding( MF_Embedding_User = fluid.embedding(
input=inputs[0], input=inputs[0],
size=[num_users, latent_dim], size=[self.num_users, self.latent_dim],
param_attr=fluid.initializer.Normal( param_attr=fluid.initializer.Normal(
loc=0.0, scale=0.01), loc=0.0, scale=0.01),
is_sparse=True) is_sparse=True)
MF_Embedding_Item = fluid.embedding( MF_Embedding_Item = fluid.embedding(
input=inputs[1], input=inputs[1],
size=[num_items, latent_dim], size=[self.num_items, self.latent_dim],
param_attr=fluid.initializer.Normal( param_attr=fluid.initializer.Normal(
loc=0.0, scale=0.01), loc=0.0, scale=0.01),
is_sparse=True) is_sparse=True)
MLP_Embedding_User = fluid.embedding( MLP_Embedding_User = fluid.embedding(
input=inputs[0], input=inputs[0],
size=[num_users, int(layers[0] / 2)], size=[self.num_users, int(self.layers[0] / 2)],
param_attr=fluid.initializer.Normal( param_attr=fluid.initializer.Normal(
loc=0.0, scale=0.01), loc=0.0, scale=0.01),
is_sparse=True) is_sparse=True)
MLP_Embedding_Item = fluid.embedding( MLP_Embedding_Item = fluid.embedding(
input=inputs[1], input=inputs[1],
size=[num_items, int(layers[0] / 2)], size=[self.num_items, int(self.layers[0] / 2)],
param_attr=fluid.initializer.Normal( param_attr=fluid.initializer.Normal(
loc=0.0, scale=0.01), loc=0.0, scale=0.01),
is_sparse=True) is_sparse=True)
...@@ -94,7 +90,7 @@ class Model(ModelBase): ...@@ -94,7 +90,7 @@ class Model(ModelBase):
for i in range(1, num_layer): for i in range(1, num_layer):
mlp_vector = fluid.layers.fc( mlp_vector = fluid.layers.fc(
input=mlp_vector, input=mlp_vector,
size=layers[i], size=self.layers[i],
act='relu', act='relu',
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.TruncatedNormal( initializer=fluid.initializer.TruncatedNormal(
...@@ -126,16 +122,3 @@ class Model(ModelBase): ...@@ -126,16 +122,3 @@ class Model(ModelBase):
self._cost = avg_cost self._cost = avg_cost
self._metrics["cost"] = avg_cost self._metrics["cost"] = avg_cost
def train_net(self):
input_data = self.input_data()
self.net(input_data)
def infer_net(self):
self._infer_data_var = self.input_data(is_infer=True)
self._infer_data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._infer_data_var,
capacity=64,
use_double_buffer=False,
iterable=False)
self.net(self._infer_data_var, is_infer=True)
...@@ -19,7 +19,7 @@ from collections import defaultdict ...@@ -19,7 +19,7 @@ from collections import defaultdict
import numpy as np import numpy as np
class EvaluateReader(Reader): class TrainReader(Reader):
def init(self): def init(self):
pass pass
......
...@@ -12,43 +12,55 @@ ...@@ -12,43 +12,55 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
workspace: "paddlerec.models.recall.ssr"
evaluate: dataset:
reader: - name: dataset_train
batch_size: 1 batch_size: 5
class: "{workspace}/ssr_infer_reader.py" type: QueueDataset
test_data_path: "{workspace}/data/train" data_path: "{workspace}/data/train"
is_return_numpy: True data_converter: "{workspace}/ssr_reader.py"
- name: dataset_infer
train:
trainer:
# for cluster training
strategy: "async"
epochs: 3
workspace: "paddlerec.models.recall.ssr"
device: cpu
reader:
batch_size: 5 batch_size: 5
class: "{workspace}/ssr_reader.py" type: QueueDataset
train_data_path: "{workspace}/data/train" data_path: "{workspace}/data/test"
data_converter: "{workspace}/ssr_infer_reader.py"
model: hyper_parameters:
models: "{workspace}/model.py"
hyper_parameters:
vocab_size: 1000 vocab_size: 1000
emb_dim: 128 emb_dim: 128
hidden_size: 100 hidden_size: 100
optimizer:
class: adagrad
learning_rate: 0.01 learning_rate: 0.01
optimizer: adagrad strategy: async
#use infer_runner mode and modify 'phase' below if infer
mode: train_runner
#mode: infer_runner
runner:
- name: train_runner
class: single_train
device: cpu
epochs: 3
save_checkpoint_interval: 2
save_inference_interval: 4
save_checkpoint_path: "increment"
save_inference_path: "inference"
print_interval: 10
- name: infer_runner
class: single_infer
init_model_path: "increment/0"
device: cpu
epochs: 3
save: phase:
increment: - name: train
dirname: "increment" model: "{workspace}/model.py"
epoch_interval: 2 dataset_name: dataset_train
save_last: True thread_num: 1
inference: #- name: infer
dirname: "inference" # model: "{workspace}/model.py"
epoch_interval: 4 # dataset_name: dataset_infer
save_last: True # thread_num: 1
...@@ -20,84 +20,44 @@ from paddlerec.core.utils import envs ...@@ -20,84 +20,44 @@ from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase from paddlerec.core.model import Model as ModelBase
class BowEncoder(object):
""" bow-encoder """
def __init__(self):
self.param_name = ""
def forward(self, emb):
return fluid.layers.sequence_pool(input=emb, pool_type='sum')
class GrnnEncoder(object):
""" grnn-encoder """
def __init__(self, param_name="grnn", hidden_size=128):
self.param_name = param_name
self.hidden_size = hidden_size
def forward(self, emb):
fc0 = fluid.layers.fc(input=emb,
size=self.hidden_size * 3,
param_attr=self.param_name + "_fc.w",
bias_attr=False)
gru_h = fluid.layers.dynamic_gru(
input=fc0,
size=self.hidden_size,
is_reverse=False,
param_attr=self.param_name + ".param",
bias_attr=self.param_name + ".bias")
return fluid.layers.sequence_pool(input=gru_h, pool_type='max')
class PairwiseHingeLoss(object):
def __init__(self, margin=0.8):
self.margin = margin
def forward(self, pos, neg):
loss_part1 = fluid.layers.elementwise_sub(
tensor.fill_constant_batch_size_like(
input=pos, shape=[-1, 1], value=self.margin, dtype='float32'),
pos)
loss_part2 = fluid.layers.elementwise_add(loss_part1, neg)
loss_part3 = fluid.layers.elementwise_max(
tensor.fill_constant_batch_size_like(
input=loss_part2, shape=[-1, 1], value=0.0, dtype='float32'),
loss_part2)
return loss_part3
class Model(ModelBase): class Model(ModelBase):
def __init__(self, config): def __init__(self, config):
ModelBase.__init__(self, config) ModelBase.__init__(self, config)
def get_correct(self, x, y): def _init_hyper_parameters(self):
less = tensor.cast(cf.less_than(x, y), dtype='float32') self.vocab_size = envs.get_global_env("hyper_parameters.vocab_size")
correct = fluid.layers.reduce_sum(less) self.emb_dim = envs.get_global_env("hyper_parameters.emb_dim")
return correct self.hidden_size = envs.get_global_env("hyper_parameters.hidden_size")
def train(self):
vocab_size = envs.get_global_env("hyper_parameters.vocab_size", None,
self._namespace)
emb_dim = envs.get_global_env("hyper_parameters.emb_dim", None,
self._namespace)
hidden_size = envs.get_global_env("hyper_parameters.hidden_size", None,
self._namespace)
emb_shape = [vocab_size, emb_dim]
self.user_encoder = GrnnEncoder()
self.item_encoder = BowEncoder()
self.pairwise_hinge_loss = PairwiseHingeLoss()
def input_data(self, is_infer=False, **kwargs):
if is_infer:
user_data = fluid.data(
name="user", shape=[None, 1], dtype="int64", lod_level=1)
all_item_data = fluid.data(
name="all_item", shape=[None, self.vocab_size], dtype="int64")
pos_label = fluid.data(
name="pos_label", shape=[None, 1], dtype="int64")
return [user_data, all_item_data, pos_label]
else:
user_data = fluid.data( user_data = fluid.data(
name="user", shape=[None, 1], dtype="int64", lod_level=1) name="user", shape=[None, 1], dtype="int64", lod_level=1)
pos_item_data = fluid.data( pos_item_data = fluid.data(
name="p_item", shape=[None, 1], dtype="int64", lod_level=1) name="p_item", shape=[None, 1], dtype="int64", lod_level=1)
neg_item_data = fluid.data( neg_item_data = fluid.data(
name="n_item", shape=[None, 1], dtype="int64", lod_level=1) name="n_item", shape=[None, 1], dtype="int64", lod_level=1)
self._data_var.extend([user_data, pos_item_data, neg_item_data]) return [user_data, pos_item_data, neg_item_data]
def net(self, inputs, is_infer=False):
if is_infer:
self._infer_net(inputs)
return
user_data = inputs[0]
pos_item_data = inputs[1]
neg_item_data = inputs[2]
emb_shape = [self.vocab_size, self.emb_dim]
self.user_encoder = GrnnEncoder()
self.item_encoder = BowEncoder()
self.pairwise_hinge_loss = PairwiseHingeLoss()
user_emb = fluid.embedding( user_emb = fluid.embedding(
input=user_data, size=emb_shape, param_attr="emb.item") input=user_data, size=emb_shape, param_attr="emb.item")
...@@ -109,79 +69,115 @@ class Model(ModelBase): ...@@ -109,79 +69,115 @@ class Model(ModelBase):
pos_item_enc = self.item_encoder.forward(pos_item_emb) pos_item_enc = self.item_encoder.forward(pos_item_emb)
neg_item_enc = self.item_encoder.forward(neg_item_emb) neg_item_enc = self.item_encoder.forward(neg_item_emb)
user_hid = fluid.layers.fc(input=user_enc, user_hid = fluid.layers.fc(input=user_enc,
size=hidden_size, size=self.hidden_size,
param_attr='user.w', param_attr='user.w',
bias_attr="user.b") bias_attr="user.b")
pos_item_hid = fluid.layers.fc(input=pos_item_enc, pos_item_hid = fluid.layers.fc(input=pos_item_enc,
size=hidden_size, size=self.hidden_size,
param_attr='item.w', param_attr='item.w',
bias_attr="item.b") bias_attr="item.b")
neg_item_hid = fluid.layers.fc(input=neg_item_enc, neg_item_hid = fluid.layers.fc(input=neg_item_enc,
size=hidden_size, size=self.hidden_size,
param_attr='item.w', param_attr='item.w',
bias_attr="item.b") bias_attr="item.b")
cos_pos = fluid.layers.cos_sim(user_hid, pos_item_hid) cos_pos = fluid.layers.cos_sim(user_hid, pos_item_hid)
cos_neg = fluid.layers.cos_sim(user_hid, neg_item_hid) cos_neg = fluid.layers.cos_sim(user_hid, neg_item_hid)
hinge_loss = self.pairwise_hinge_loss.forward(cos_pos, cos_neg) hinge_loss = self.pairwise_hinge_loss.forward(cos_pos, cos_neg)
avg_cost = fluid.layers.mean(hinge_loss) avg_cost = fluid.layers.mean(hinge_loss)
correct = self.get_correct(cos_neg, cos_pos) correct = self._get_correct(cos_neg, cos_pos)
self._cost = avg_cost self._cost = avg_cost
self._metrics["correct"] = correct self._metrics["correct"] = correct
self._metrics["hinge_loss"] = hinge_loss self._metrics["hinge_loss"] = hinge_loss
def train_net(self): def _infer_net(self, inputs):
self.train() user_data = inputs[0]
all_item_data = inputs[1]
def infer(self): pos_label = inputs[2]
vocab_size = envs.get_global_env("hyper_parameters.vocab_size", None,
self._namespace)
emb_dim = envs.get_global_env("hyper_parameters.emb_dim", None,
self._namespace)
hidden_size = envs.get_global_env("hyper_parameters.hidden_size", None,
self._namespace)
user_data = fluid.data(
name="user", shape=[None, 1], dtype="int64", lod_level=1)
all_item_data = fluid.data(
name="all_item", shape=[None, vocab_size], dtype="int64")
pos_label = fluid.data(
name="pos_label", shape=[None, 1], dtype="int64")
self._infer_data_var = [user_data, all_item_data, pos_label]
self._infer_data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._infer_data_var,
capacity=64,
use_double_buffer=False,
iterable=False)
user_emb = fluid.embedding( user_emb = fluid.embedding(
input=user_data, size=[vocab_size, emb_dim], param_attr="emb.item") input=user_data,
size=[self.vocab_size, self.emb_dim],
param_attr="emb.item")
all_item_emb = fluid.embedding( all_item_emb = fluid.embedding(
input=all_item_data, input=all_item_data,
size=[vocab_size, emb_dim], size=[self.vocab_size, self.emb_dim],
param_attr="emb.item") param_attr="emb.item")
all_item_emb_re = fluid.layers.reshape( all_item_emb_re = fluid.layers.reshape(
x=all_item_emb, shape=[-1, emb_dim]) x=all_item_emb, shape=[-1, self.emb_dim])
user_encoder = GrnnEncoder() user_encoder = GrnnEncoder()
user_enc = user_encoder.forward(user_emb) user_enc = user_encoder.forward(user_emb)
user_hid = fluid.layers.fc(input=user_enc, user_hid = fluid.layers.fc(input=user_enc,
size=hidden_size, size=self.hidden_size,
param_attr='user.w', param_attr='user.w',
bias_attr="user.b") bias_attr="user.b")
user_exp = fluid.layers.expand( user_exp = fluid.layers.expand(
x=user_hid, expand_times=[1, vocab_size]) x=user_hid, expand_times=[1, self.vocab_size])
user_re = fluid.layers.reshape(x=user_exp, shape=[-1, hidden_size]) user_re = fluid.layers.reshape(
x=user_exp, shape=[-1, self.hidden_size])
all_item_hid = fluid.layers.fc(input=all_item_emb_re, all_item_hid = fluid.layers.fc(input=all_item_emb_re,
size=hidden_size, size=self.hidden_size,
param_attr='item.w', param_attr='item.w',
bias_attr="item.b") bias_attr="item.b")
cos_item = fluid.layers.cos_sim(X=all_item_hid, Y=user_re) cos_item = fluid.layers.cos_sim(X=all_item_hid, Y=user_re)
all_pre_ = fluid.layers.reshape(x=cos_item, shape=[-1, vocab_size]) all_pre_ = fluid.layers.reshape(
x=cos_item, shape=[-1, self.vocab_size])
acc = fluid.layers.accuracy(input=all_pre_, label=pos_label, k=20) acc = fluid.layers.accuracy(input=all_pre_, label=pos_label, k=20)
self._infer_results['recall20'] = acc self._infer_results['recall20'] = acc
def infer_net(self): def _get_correct(self, x, y):
self.infer() less = tensor.cast(cf.less_than(x, y), dtype='float32')
correct = fluid.layers.reduce_sum(less)
return correct
class BowEncoder(object):
""" bow-encoder """
def __init__(self):
self.param_name = ""
def forward(self, emb):
return fluid.layers.sequence_pool(input=emb, pool_type='sum')
class GrnnEncoder(object):
""" grnn-encoder """
def __init__(self, param_name="grnn", hidden_size=128):
self.param_name = param_name
self.hidden_size = hidden_size
def forward(self, emb):
fc0 = fluid.layers.fc(input=emb,
size=self.hidden_size * 3,
param_attr=self.param_name + "_fc.w",
bias_attr=False)
gru_h = fluid.layers.dynamic_gru(
input=fc0,
size=self.hidden_size,
is_reverse=False,
param_attr=self.param_name + ".param",
bias_attr=self.param_name + ".bias")
return fluid.layers.sequence_pool(input=gru_h, pool_type='max')
class PairwiseHingeLoss(object):
def __init__(self, margin=0.8):
self.margin = margin
def forward(self, pos, neg):
loss_part1 = fluid.layers.elementwise_sub(
tensor.fill_constant_batch_size_like(
input=pos, shape=[-1, 1], value=self.margin, dtype='float32'),
pos)
loss_part2 = fluid.layers.elementwise_add(loss_part1, neg)
loss_part3 = fluid.layers.elementwise_max(
tensor.fill_constant_batch_size_like(
input=loss_part2, shape=[-1, 1], value=0.0, dtype='float32'),
loss_part2)
return loss_part3
...@@ -13,37 +13,42 @@ ...@@ -13,37 +13,42 @@
# limitations under the License. # limitations under the License.
train: workspace: "paddlerec.models.recall.youtube_dnn"
trainer:
# for cluster training
strategy: "async"
epochs: 3 dataset:
workspace: "paddlerec.models.recall.youtube_dnn" - name: dataset_train
device: cpu batch_size: 5
type: DataLoader
#type: QueueDataset
data_path: "{workspace}/data/train"
data_converter: "{workspace}/random_reader.py"
reader: hyper_parameters:
batch_size: 2
class: "{workspace}/random_reader.py"
train_data_path: "{workspace}/data/train"
model:
models: "{workspace}/model.py"
hyper_parameters:
watch_vec_size: 64 watch_vec_size: 64
search_vec_size: 64 search_vec_size: 64
other_feat_size: 64 other_feat_size: 64
output_size: 100 output_size: 100
layers: [128, 64, 32] layers: [128, 64, 32]
learning_rate: 0.01 optimizer:
optimizer: sgd class: adam
learning_rate: 0.001
strategy: async
mode: train_runner
runner:
- name: train_runner
class: single_train
device: cpu
epochs: 3
save_checkpoint_interval: 2
save_inference_interval: 4
save_checkpoint_path: "increment"
save_inference_path: "inference"
print_interval: 10
save: phase:
increment: - name: train
dirname: "increment" model: "{workspace}/model.py"
epoch_interval: 2 dataset_name: dataset_train
save_last: True thread_num: 1
inference:
dirname: "inference"
epoch_interval: 4
save_last: True
...@@ -13,39 +13,64 @@ ...@@ -13,39 +13,64 @@
# limitations under the License. # limitations under the License.
import math import math
import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase from paddlerec.core.model import Model as ModelBase
import numpy as np
class Model(ModelBase): class Model(ModelBase):
def __init__(self, config): def __init__(self, config):
ModelBase.__init__(self, config) ModelBase.__init__(self, config)
def input_data(self, is_infer=False): def _init_hyper_parameters(self):
self.watch_vec_size = envs.get_global_env(
"hyper_parameters.watch_vec_size")
self.search_vec_size = envs.get_global_env(
"hyper_parameters.search_vec_size")
self.other_feat_size = envs.get_global_env(
"hyper_parameters.other_feat_size")
self.output_size = envs.get_global_env("hyper_parameters.output_size")
self.layers = envs.get_global_env("hyper_parameters.layers")
watch_vec_size = envs.get_global_env("hyper_parameters.watch_vec_size", def input_data(self, is_infer=False, **kwargs):
None, self._namespace)
search_vec_size = envs.get_global_env(
"hyper_parameters.search_vec_size", None, self._namespace)
other_feat_size = envs.get_global_env(
"hyper_parameters.other_feat_size", None, self._namespace)
watch_vec = fluid.data( watch_vec = fluid.data(
name="watch_vec", shape=[None, watch_vec_size], dtype="float32") name="watch_vec",
shape=[None, self.watch_vec_size],
dtype="float32")
search_vec = fluid.data( search_vec = fluid.data(
name="search_vec", shape=[None, search_vec_size], dtype="float32") name="search_vec",
shape=[None, self.search_vec_size],
dtype="float32")
other_feat = fluid.data( other_feat = fluid.data(
name="other_feat", shape=[None, other_feat_size], dtype="float32") name="other_feat",
shape=[None, self.other_feat_size],
dtype="float32")
label = fluid.data(name="label", shape=[None, 1], dtype="int64") label = fluid.data(name="label", shape=[None, 1], dtype="int64")
inputs = [watch_vec] + [search_vec] + [other_feat] + [label] inputs = [watch_vec] + [search_vec] + [other_feat] + [label]
self._data_var = inputs
return inputs return inputs
def fc(self, tag, data, out_dim, active='relu'): def net(self, inputs, is_infer=False):
concat_feats = fluid.layers.concat(input=inputs[:-1], axis=-1)
l1 = self._fc('l1', concat_feats, self.layers[0], 'relu')
l2 = self._fc('l2', l1, self.layers[1], 'relu')
l3 = self._fc('l3', l2, self.layers[2], 'relu')
l4 = self._fc('l4', l3, self.output_size, 'softmax')
num_seqs = fluid.layers.create_tensor(dtype='int64')
acc = fluid.layers.accuracy(input=l4, label=inputs[-1], total=num_seqs)
cost = fluid.layers.cross_entropy(input=l4, label=inputs[-1])
avg_cost = fluid.layers.mean(cost)
self._cost = avg_cost
self._metrics["acc"] = acc
def _fc(self, tag, data, out_dim, active='relu'):
init_stddev = 1.0 init_stddev = 1.0
scales = 1.0 / np.sqrt(data.shape[1]) scales = 1.0 / np.sqrt(data.shape[1])
...@@ -67,31 +92,3 @@ class Model(ModelBase): ...@@ -67,31 +92,3 @@ class Model(ModelBase):
bias_attr=b_attr, bias_attr=b_attr,
name=tag) name=tag)
return out return out
def net(self, inputs):
output_size = envs.get_global_env("hyper_parameters.output_size", None,
self._namespace)
layers = envs.get_global_env("hyper_parameters.layers", None,
self._namespace)
concat_feats = fluid.layers.concat(input=inputs[:-1], axis=-1)
l1 = self.fc('l1', concat_feats, layers[0], 'relu')
l2 = self.fc('l2', l1, layers[1], 'relu')
l3 = self.fc('l3', l2, layers[2], 'relu')
l4 = self.fc('l4', l3, output_size, 'softmax')
num_seqs = fluid.layers.create_tensor(dtype='int64')
acc = fluid.layers.accuracy(input=l4, label=inputs[-1], total=num_seqs)
cost = fluid.layers.cross_entropy(input=l4, label=inputs[-1])
avg_cost = fluid.layers.mean(cost)
self._cost = avg_cost
self._metrics["acc"] = acc
def train_net(self):
input_data = self.input_data()
self.net(input_data)
def infer_net(self):
pass
...@@ -13,22 +13,22 @@ ...@@ -13,22 +13,22 @@
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
import numpy as np
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
from collections import defaultdict from collections import defaultdict
import numpy as np
class TrainReader(Reader): class TrainReader(Reader):
def init(self): def init(self):
self.watch_vec_size = envs.get_global_env( self.watch_vec_size = envs.get_global_env(
"hyper_parameters.watch_vec_size", None, "train.model") "hyper_parameters.watch_vec_size")
self.search_vec_size = envs.get_global_env( self.search_vec_size = envs.get_global_env(
"hyper_parameters.search_vec_size", None, "train.model") "hyper_parameters.search_vec_size")
self.other_feat_size = envs.get_global_env( self.other_feat_size = envs.get_global_env(
"hyper_parameters.other_feat_size", None, "train.model") "hyper_parameters.other_feat_size")
self.output_size = envs.get_global_env("hyper_parameters.output_size", self.output_size = envs.get_global_env("hyper_parameters.output_size")
None, "train.model")
def generate_sample(self, line): def generate_sample(self, line):
""" """
......
...@@ -12,44 +12,56 @@ ...@@ -12,44 +12,56 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
evaluate:
reader:
batch_size: 1
class: "{workspace}/random_infer_reader.py"
test_data_path: "{workspace}/data/train"
train:
trainer:
# for cluster training
strategy: "async"
epochs: 3 workspace: "paddlerec.models.rerank.listwise"
workspace: "paddlerec.models.rerank.listwise"
device: cpu
reader: dataset:
batch_size: 2 - name: dataset_train
class: "{workspace}/random_reader.py" type: DataLoader
train_data_path: "{workspace}/data/train" data_path: "{workspace}/data/train"
dataset_class: "DataLoader" data_converter: "{workspace}/random_reader.py"
- name: dataset_infer
type: DataLoader
data_path: "{workspace}/data/test"
data_converter: "{workspace}/random_reader.py"
model: hyper_parameters:
models: "{workspace}/model.py"
hyper_parameters:
hidden_size: 128 hidden_size: 128
user_vocab: 200 user_vocab: 200
item_vocab: 1000 item_vocab: 1000
item_len: 5 item_len: 5
embed_size: 16 embed_size: 16
batch_size: 1
optimizer:
class: sgd
learning_rate: 0.01 learning_rate: 0.01
optimizer: sgd strategy: async
#use infer_runner mode and modify 'phase' below if infer
mode: train_runner
#mode: infer_runner
runner:
- name: train_runner
class: single_train
device: cpu
epochs: 3
save_checkpoint_interval: 2
save_inference_interval: 4
save_checkpoint_path: "increment"
save_inference_path: "inference"
- name: infer_runner
class: single_infer
init_model_path: "increment/0"
device: cpu
epochs: 3
save: phase:
increment: - name: train
dirname: "increment" model: "{workspace}/model.py"
epoch_interval: 2 dataset_name: dataset_train
save_last: True thread_num: 1
inference: #- name: infer
dirname: "inference" # model: "{workspace}/model.py"
epoch_interval: 4 # dataset_name: dataset_infer
save_last: True # thread_num: 1
...@@ -25,18 +25,13 @@ class Model(ModelBase): ...@@ -25,18 +25,13 @@ class Model(ModelBase):
ModelBase.__init__(self, config) ModelBase.__init__(self, config)
def _init_hyper_parameters(self): def _init_hyper_parameters(self):
self.item_len = envs.get_global_env("hyper_parameters.self.item_len", self.item_len = envs.get_global_env("hyper_parameters.self.item_len")
None, self._namespace) self.hidden_size = envs.get_global_env("hyper_parameters.hidden_size")
self.hidden_size = envs.get_global_env("hyper_parameters.hidden_size", self.user_vocab = envs.get_global_env("hyper_parameters.user_vocab")
None, self._namespace) self.item_vocab = envs.get_global_env("hyper_parameters.item_vocab")
self.user_vocab = envs.get_global_env("hyper_parameters.user_vocab", self.embed_size = envs.get_global_env("hyper_parameters.embed_size")
None, self._namespace)
self.item_vocab = envs.get_global_env("hyper_parameters.item_vocab", def input_data(self, is_infer=False, **kwargs):
None, self._namespace)
self.embed_size = envs.get_global_env("hyper_parameters.embed_size",
None, self._namespace)
def input_data(self, is_infer=False):
user_slot_names = fluid.data( user_slot_names = fluid.data(
name='user_slot_names', name='user_slot_names',
shape=[None, 1], shape=[None, 1],
......
...@@ -23,14 +23,10 @@ from collections import defaultdict ...@@ -23,14 +23,10 @@ from collections import defaultdict
class TrainReader(Reader): class TrainReader(Reader):
def init(self): def init(self):
self.user_vocab = envs.get_global_env("hyper_parameters.user_vocab", self.user_vocab = envs.get_global_env("hyper_parameters.user_vocab")
None, "train.model") self.item_vocab = envs.get_global_env("hyper_parameters.item_vocab")
self.item_vocab = envs.get_global_env("hyper_parameters.item_vocab", self.item_len = envs.get_global_env("hyper_parameters.item_len")
None, "train.model") self.batch_size = envs.get_global_env("hyper_parameters.batch_size")
self.item_len = envs.get_global_env("hyper_parameters.item_len", None,
"train.model")
self.batch_size = envs.get_global_env("batch_size", None,
"train.reader")
def reader_creator(self): def reader_creator(self):
def reader(): def reader():
......
...@@ -9,9 +9,6 @@ ...@@ -9,9 +9,6 @@
* [整体介绍](#整体介绍) * [整体介绍](#整体介绍)
* [重排序模型列表](#重排序模型列表) * [重排序模型列表](#重排序模型列表)
* [使用教程](#使用教程) * [使用教程](#使用教程)
* [训练 预测](#训练 预测)
* [效果对比](#效果对比)
* [模型效果列表](#模型效果列表)
## 整体介绍 ## 整体介绍
### 融合模型列表 ### 融合模型列表
...@@ -29,15 +26,11 @@ ...@@ -29,15 +26,11 @@
<p> <p>
## 使用教程 ## 使用教程(快速开始)
### 训练 预测
```shell ```shell
python -m paddlerec.run -m paddlerec.models.rerank.listwise # listwise python -m paddlerec.run -m paddlerec.models.rerank.listwise # listwise
``` ```
## 效果对比 ## 使用教程(复现论文)
### 模型效果列表
| 数据集 | 模型 | loss | auc | listwise原论文没有给出训练数据,我们使用了随机的数据,可参考快速开始
| :------------------: | :--------------------: | :---------: |:---------: |
| -- | Listwise | -- | -- |
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册